From 8af60dae0911d6d087c9fc11f27b947f867589e2 Mon Sep 17 00:00:00 2001 From: Dmitry Shmidt Date: Tue, 24 Jun 2014 09:36:50 -0700 Subject: [PATCH 01/82] net: wireless: Increase scan entry expiration to fit new scan time Change-Id: I0e23ce45d78d7c17633670973f49943a5ed6032d Signed-off-by: Dmitry Shmidt --- net/wireless/scan.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/wireless/scan.c b/net/wireless/scan.c index 41b0f96a933f..3ebf125bc99e 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -55,7 +55,7 @@ * also linked into the probe response struct. */ -#define IEEE80211_SCAN_RESULT_EXPIRE (3 * HZ) +#define IEEE80211_SCAN_RESULT_EXPIRE (7 * HZ) static void bss_free(struct cfg80211_internal_bss *bss) { From f6f56efe7de93cd091ee456921a6043da370c22a Mon Sep 17 00:00:00 2001 From: Dmitry Shmidt Date: Tue, 24 Jun 2014 13:19:46 -0700 Subject: [PATCH 02/82] net: wireless: Add NL80211_FLAG_NEED_WIPHY flag to vendor command Change-Id: I52ee3bc8a422c2a4c57cccccccd6ba3e721b4c01 Signed-off-by: Dmitry Shmidt --- net/wireless/nl80211.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index ddb993fb0d38..150a38fc346e 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -9153,7 +9153,8 @@ static struct genl_ops nl80211_ops[] = { .doit = nl80211_vendor_cmd, .policy = nl80211_policy, .flags = GENL_ADMIN_PERM, - .internal_flags = NL80211_FLAG_NEED_RTNL, + .internal_flags = NL80211_FLAG_NEED_WIPHY | + NL80211_FLAG_NEED_RTNL, }, }; From 364a09b74ecbcee9411c3677212f0392d3c876de Mon Sep 17 00:00:00 2001 From: Stephen Smalley Date: Tue, 23 Jul 2013 17:38:41 -0400 Subject: [PATCH 03/82] SELinux: Enable setting security contexts on rootfs inodes. rootfs (ramfs) can support setting of security contexts by userspace due to the vfs fallback behavior of calling the security module to set the in-core inode state for security.* attributes when the filesystem does not provide an xattr handler. No xattr handler required as the inodes are pinned in memory and have no backing store. This is useful in allowing early userspace to label individual files within a rootfs while still providing a policy-defined default via genfs. Signed-off-by: Stephen Smalley Signed-off-by: Paul Moore Signed-off-by: Eric Paris --- security/selinux/hooks.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 44087388010c..a8de30bd733a 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -422,6 +422,13 @@ static int sb_finish_set_opts(struct super_block *sb) if (strncmp(sb->s_type->name, "sysfs", sizeof("sysfs")) == 0) sbsec->flags |= SE_SBLABELSUPP; + /* + * Special handling for rootfs. Is genfs but supports + * setting SELinux context on in-core inodes. + */ + if (strncmp(sb->s_type->name, "rootfs", sizeof("rootfs")) == 0) + sbsec->flags |= SE_SBLABELSUPP; + /* Initialize the root inode. */ rc = inode_doinit_with_dentry(root_inode, root); From 99a6ea48b591877d1cd6a51732c40a1d5321d961 Mon Sep 17 00:00:00 2001 From: Lorenzo Colitti Date: Mon, 31 Mar 2014 16:23:51 +0900 Subject: [PATCH 04/82] net: core: Support UID-based routing. This contains the following commits: 1. cc2f522 net: core: Add a UID range to fib rules. 2. d7ed2bd net: core: Use the socket UID in routing lookups. 3. 2f9306a net: core: Add a RTA_UID attribute to routes. This is so that userspace can do per-UID route lookups. 4. 8e46efb net: ipv6: Use the UID in IPv6 PMTUD IPv4 PMTUD already does this because ipv4_sk_update_pmtu uses __build_flow_key, which includes the UID. Bug: 15413527 Change-Id: I81bd31dae655de9cce7d7a1f9a905dc1c2feba7c Signed-off-by: Lorenzo Colitti --- include/net/fib_rules.h | 6 +++- include/net/flow.h | 9 +++++- include/net/ip.h | 1 + include/net/ip6_route.h | 2 +- include/net/route.h | 5 +-- include/uapi/linux/fib_rules.h | 2 ++ include/uapi/linux/rtnetlink.h | 1 + net/core/fib_rules.c | 53 ++++++++++++++++++++++++++++++-- net/ipv4/fib_frontend.c | 1 + net/ipv4/inet_connection_sock.c | 6 ++-- net/ipv4/ip_output.c | 3 +- net/ipv4/ping.c | 3 +- net/ipv4/raw.c | 3 +- net/ipv4/route.c | 25 +++++++++++---- net/ipv4/syncookies.c | 3 +- net/ipv4/udp.c | 3 +- net/ipv6/af_inet6.c | 1 + net/ipv6/ah6.c | 2 +- net/ipv6/datagram.c | 1 + net/ipv6/esp6.c | 2 +- net/ipv6/icmp.c | 2 +- net/ipv6/inet6_connection_sock.c | 2 ++ net/ipv6/ipcomp6.c | 2 +- net/ipv6/ping.c | 1 + net/ipv6/raw.c | 1 + net/ipv6/route.c | 12 ++++++-- net/ipv6/syncookies.c | 1 + net/ipv6/tcp_ipv6.c | 1 + net/ipv6/udp.c | 1 + 29 files changed, 129 insertions(+), 26 deletions(-) diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h index e361f4882426..4ac12e14c6d9 100644 --- a/include/net/fib_rules.h +++ b/include/net/fib_rules.h @@ -23,6 +23,8 @@ struct fib_rule { struct fib_rule __rcu *ctarget; char iifname[IFNAMSIZ]; char oifname[IFNAMSIZ]; + kuid_t uid_start; + kuid_t uid_end; struct rcu_head rcu; struct net * fr_net; }; @@ -80,7 +82,9 @@ struct fib_rules_ops { [FRA_FWMARK] = { .type = NLA_U32 }, \ [FRA_FWMASK] = { .type = NLA_U32 }, \ [FRA_TABLE] = { .type = NLA_U32 }, \ - [FRA_GOTO] = { .type = NLA_U32 } + [FRA_GOTO] = { .type = NLA_U32 }, \ + [FRA_UID_START] = { .type = NLA_U32 }, \ + [FRA_UID_END] = { .type = NLA_U32 } static inline void fib_rule_get(struct fib_rule *rule) { diff --git a/include/net/flow.h b/include/net/flow.h index 628e11b98c58..c91e2aae3fb1 100644 --- a/include/net/flow.h +++ b/include/net/flow.h @@ -10,6 +10,7 @@ #include #include #include +#include struct flowi_common { int flowic_oif; @@ -23,6 +24,7 @@ struct flowi_common { #define FLOWI_FLAG_CAN_SLEEP 0x02 #define FLOWI_FLAG_KNOWN_NH 0x04 __u32 flowic_secid; + kuid_t flowic_uid; }; union flowi_uli { @@ -59,6 +61,7 @@ struct flowi4 { #define flowi4_proto __fl_common.flowic_proto #define flowi4_flags __fl_common.flowic_flags #define flowi4_secid __fl_common.flowic_secid +#define flowi4_uid __fl_common.flowic_uid /* (saddr,daddr) must be grouped, same order as in IP header */ __be32 saddr; @@ -78,7 +81,8 @@ static inline void flowi4_init_output(struct flowi4 *fl4, int oif, __u32 mark, __u8 tos, __u8 scope, __u8 proto, __u8 flags, __be32 daddr, __be32 saddr, - __be16 dport, __be16 sport) + __be16 dport, __be16 sport, + kuid_t uid) { fl4->flowi4_oif = oif; fl4->flowi4_iif = 0; @@ -88,6 +92,7 @@ static inline void flowi4_init_output(struct flowi4 *fl4, int oif, fl4->flowi4_proto = proto; fl4->flowi4_flags = flags; fl4->flowi4_secid = 0; + fl4->flowi4_uid = uid; fl4->daddr = daddr; fl4->saddr = saddr; fl4->fl4_dport = dport; @@ -115,6 +120,7 @@ struct flowi6 { #define flowi6_proto __fl_common.flowic_proto #define flowi6_flags __fl_common.flowic_flags #define flowi6_secid __fl_common.flowic_secid +#define flowi6_uid __fl_common.flowic_uid struct in6_addr daddr; struct in6_addr saddr; __be32 flowlabel; @@ -158,6 +164,7 @@ struct flowi { #define flowi_proto u.__fl_common.flowic_proto #define flowi_flags u.__fl_common.flowic_flags #define flowi_secid u.__fl_common.flowic_secid +#define flowi_uid u.__fl_common.flowic_uid } __attribute__((__aligned__(BITS_PER_LONG/8))); static inline struct flowi *flowi4_to_flowi(struct flowi4 *fl4) diff --git a/include/net/ip.h b/include/net/ip.h index 509b88079270..02fc145ecc42 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -153,6 +153,7 @@ struct ip_reply_arg { /* -1 if not needed */ int bound_dev_if; u8 tos; + kuid_t uid; }; #define IP_REPLY_ARG_NOSRCCHECK 1 diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index 260f83f16bcf..25b4500f28c9 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -131,7 +131,7 @@ extern int rt6_route_rcv(struct net_device *dev, const struct in6_addr *gwaddr); extern void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu, - int oif, u32 mark); + int oif, u32 mark, kuid_t uid); extern void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu); extern void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark); diff --git a/include/net/route.h b/include/net/route.h index 2ea40c1b5e00..b5b44875543e 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -142,7 +142,7 @@ static inline struct rtable *ip_route_output_ports(struct net *net, struct flowi flowi4_init_output(fl4, oif, sk ? sk->sk_mark : 0, tos, RT_SCOPE_UNIVERSE, proto, sk ? inet_sk_flowi_flags(sk) : 0, - daddr, saddr, dport, sport); + daddr, saddr, dport, sport, sock_i_uid(sk)); if (sk) security_sk_classify_flow(sk, flowi4_to_flowi(fl4)); return ip_route_output_flow(net, fl4, sk); @@ -253,7 +253,8 @@ static inline void ip_route_connect_init(struct flowi4 *fl4, __be32 dst, __be32 flow_flags |= FLOWI_FLAG_CAN_SLEEP; flowi4_init_output(fl4, oif, sk->sk_mark, tos, RT_SCOPE_UNIVERSE, - protocol, flow_flags, dst, src, dport, sport); + protocol, flow_flags, dst, src, dport, sport, + sock_i_uid(sk)); } static inline struct rtable *ip_route_connect(struct flowi4 *fl4, diff --git a/include/uapi/linux/fib_rules.h b/include/uapi/linux/fib_rules.h index 51da65b68b85..9dcdb6251cb8 100644 --- a/include/uapi/linux/fib_rules.h +++ b/include/uapi/linux/fib_rules.h @@ -49,6 +49,8 @@ enum { FRA_TABLE, /* Extended table id */ FRA_FWMASK, /* mask for netfilter mark */ FRA_OIFNAME, + FRA_UID_START, /* UID range */ + FRA_UID_END, __FRA_MAX }; diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h index 7a2144e1afae..07c1146c1f51 100644 --- a/include/uapi/linux/rtnetlink.h +++ b/include/uapi/linux/rtnetlink.h @@ -297,6 +297,7 @@ enum rtattr_type_t { RTA_TABLE, RTA_MARK, RTA_MFC_STATS, + RTA_UID, __RTA_MAX }; diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index 0e9131195eb0..a40a876b8559 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -31,6 +31,8 @@ int fib_default_rule_add(struct fib_rules_ops *ops, r->pref = pref; r->table = table; r->flags = flags; + r->uid_start = INVALID_UID; + r->uid_end = INVALID_UID; r->fr_net = hold_net(ops->fro_net); /* The lock is not required here, the list in unreacheable @@ -179,6 +181,23 @@ void fib_rules_unregister(struct fib_rules_ops *ops) } EXPORT_SYMBOL_GPL(fib_rules_unregister); +static inline kuid_t fib_nl_uid(struct nlattr *nla) +{ + return make_kuid(current_user_ns(), nla_get_u32(nla)); +} + +static int nla_put_uid(struct sk_buff *skb, int idx, kuid_t uid) +{ + return nla_put_u32(skb, idx, from_kuid_munged(current_user_ns(), uid)); +} + +static int fib_uid_range_match(struct flowi *fl, struct fib_rule *rule) +{ + return (!uid_valid(rule->uid_start) && !uid_valid(rule->uid_end)) || + (uid_gte(fl->flowi_uid, rule->uid_start) && + uid_lte(fl->flowi_uid, rule->uid_end)); +} + static int fib_rule_match(struct fib_rule *rule, struct fib_rules_ops *ops, struct flowi *fl, int flags) { @@ -193,6 +212,9 @@ static int fib_rule_match(struct fib_rule *rule, struct fib_rules_ops *ops, if ((rule->mark ^ fl->flowi_mark) & rule->mark_mask) goto out; + if (!fib_uid_range_match(fl, rule)) + goto out; + ret = ops->match(rule, fl, flags); out: return (rule->flags & FIB_RULE_INVERT) ? !ret : ret; @@ -363,6 +385,19 @@ static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh) } else if (rule->action == FR_ACT_GOTO) goto errout_free; + /* UID start and end must either both be valid or both unspecified. */ + rule->uid_start = rule->uid_end = INVALID_UID; + if (tb[FRA_UID_START] || tb[FRA_UID_END]) { + if (tb[FRA_UID_START] && tb[FRA_UID_END]) { + rule->uid_start = fib_nl_uid(tb[FRA_UID_START]); + rule->uid_end = fib_nl_uid(tb[FRA_UID_END]); + } + if (!uid_valid(rule->uid_start) || + !uid_valid(rule->uid_end) || + !uid_lte(rule->uid_start, rule->uid_end)) + goto errout_free; + } + err = ops->configure(rule, skb, frh, tb); if (err < 0) goto errout_free; @@ -469,6 +504,14 @@ static int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh) (rule->mark_mask != nla_get_u32(tb[FRA_FWMASK]))) continue; + if (tb[FRA_UID_START] && + !uid_eq(rule->uid_start, fib_nl_uid(tb[FRA_UID_START]))) + continue; + + if (tb[FRA_UID_END] && + !uid_eq(rule->uid_end, fib_nl_uid(tb[FRA_UID_END]))) + continue; + if (!ops->compare(rule, frh, tb)) continue; @@ -525,7 +568,9 @@ static inline size_t fib_rule_nlmsg_size(struct fib_rules_ops *ops, + nla_total_size(4) /* FRA_PRIORITY */ + nla_total_size(4) /* FRA_TABLE */ + nla_total_size(4) /* FRA_FWMARK */ - + nla_total_size(4); /* FRA_FWMASK */ + + nla_total_size(4) /* FRA_FWMASK */ + + nla_total_size(4) /* FRA_UID_START */ + + nla_total_size(4); /* FRA_UID_END */ if (ops->nlmsg_payload) payload += ops->nlmsg_payload(rule); @@ -579,7 +624,11 @@ static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule, ((rule->mark_mask || rule->mark) && nla_put_u32(skb, FRA_FWMASK, rule->mark_mask)) || (rule->target && - nla_put_u32(skb, FRA_GOTO, rule->target))) + nla_put_u32(skb, FRA_GOTO, rule->target)) || + (uid_valid(rule->uid_start) && + nla_put_uid(skb, FRA_UID_START, rule->uid_start)) || + (uid_valid(rule->uid_end) && + nla_put_uid(skb, FRA_UID_END, rule->uid_end))) goto nla_put_failure; if (ops->fill(rule, skb, frh) < 0) goto nla_put_failure; diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index c7629a209f9d..ffffeb448ec4 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -531,6 +531,7 @@ const struct nla_policy rtm_ipv4_policy[RTA_MAX + 1] = { [RTA_METRICS] = { .type = NLA_NESTED }, [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) }, [RTA_FLOW] = { .type = NLA_U32 }, + [RTA_UID] = { .type = NLA_U32 }, }; static int rtm_to_fib_config(struct net *net, struct sk_buff *skb, diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 442087d371f6..6dfec2f18214 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -422,7 +422,8 @@ struct dst_entry *inet_csk_route_req(struct sock *sk, sk->sk_protocol, flags, (opt && opt->opt.srr) ? opt->opt.faddr : ireq->rmt_addr, - ireq->loc_addr, ireq->rmt_port, inet_sk(sk)->inet_sport); + ireq->loc_addr, ireq->rmt_port, inet_sk(sk)->inet_sport, + sock_i_uid(sk)); security_req_classify_flow(req, flowi4_to_flowi(fl4)); rt = ip_route_output_flow(net, fl4, sk); if (IS_ERR(rt)) @@ -458,7 +459,8 @@ struct dst_entry *inet_csk_route_child_sock(struct sock *sk, RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, sk->sk_protocol, inet_sk_flowi_flags(sk), (opt && opt->opt.srr) ? opt->opt.faddr : ireq->rmt_addr, - ireq->loc_addr, ireq->rmt_port, inet_sk(sk)->inet_sport); + ireq->loc_addr, ireq->rmt_port, inet_sk(sk)->inet_sport, + sock_i_uid(sk)); security_req_classify_flow(req, flowi4_to_flowi(fl4)); rt = ip_route_output_flow(net, fl4, sk); if (IS_ERR(rt)) diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index c2ee385cecfb..8e20e9405582 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -1503,7 +1503,8 @@ void ip_send_unicast_reply(struct net *net, struct sk_buff *skb, __be32 daddr, RT_SCOPE_UNIVERSE, ip_hdr(skb)->protocol, ip_reply_arg_flowi_flags(arg), daddr, saddr, - tcp_hdr(skb)->source, tcp_hdr(skb)->dest); + tcp_hdr(skb)->source, tcp_hdr(skb)->dest, + arg->uid); security_skb_classify_flow(skb, flowi4_to_flowi(&fl4)); rt = ip_route_output_key(net, &fl4); if (IS_ERR(rt)) diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index 111e5a409594..b83d82951cad 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -768,7 +768,8 @@ int ping_v4_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, flowi4_init_output(&fl4, ipc.oif, sk->sk_mark, tos, RT_SCOPE_UNIVERSE, sk->sk_protocol, - inet_sk_flowi_flags(sk), faddr, saddr, 0, 0); + inet_sk_flowi_flags(sk), faddr, saddr, 0, 0, + sock_i_uid(sk)); security_sk_classify_flow(sk, flowi4_to_flowi(&fl4)); rt = ip_route_output_flow(net, &fl4, sk); diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index dd44e0ab600c..b8287330c579 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -572,7 +572,8 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, RT_SCOPE_UNIVERSE, inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol, inet_sk_flowi_flags(sk) | FLOWI_FLAG_CAN_SLEEP, - daddr, saddr, 0, 0); + daddr, saddr, 0, 0, + sock_i_uid(sk)); if (!inet->hdrincl) { err = raw_probe_proto_opt(&fl4, msg); diff --git a/net/ipv4/route.c b/net/ipv4/route.c index c04359196ebc..ef2fe9d2e9e6 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -500,7 +500,7 @@ void __ip_select_ident(struct iphdr *iph, struct dst_entry *dst, int more) } EXPORT_SYMBOL(__ip_select_ident); -static void __build_flow_key(struct flowi4 *fl4, const struct sock *sk, +static void __build_flow_key(struct flowi4 *fl4, struct sock *sk, const struct iphdr *iph, int oif, u8 tos, u8 prot, u32 mark, int flow_flags) @@ -516,11 +516,12 @@ static void __build_flow_key(struct flowi4 *fl4, const struct sock *sk, flowi4_init_output(fl4, oif, mark, tos, RT_SCOPE_UNIVERSE, prot, flow_flags, - iph->daddr, iph->saddr, 0, 0); + iph->daddr, iph->saddr, 0, 0, + sock_i_uid(sk)); } static void build_skb_flow_key(struct flowi4 *fl4, const struct sk_buff *skb, - const struct sock *sk) + struct sock *sk) { const struct iphdr *iph = ip_hdr(skb); int oif = skb->dev->ifindex; @@ -531,7 +532,7 @@ static void build_skb_flow_key(struct flowi4 *fl4, const struct sk_buff *skb, __build_flow_key(fl4, sk, iph, oif, tos, prot, mark, 0); } -static void build_sk_flow_key(struct flowi4 *fl4, const struct sock *sk) +static void build_sk_flow_key(struct flowi4 *fl4, struct sock *sk) { const struct inet_sock *inet = inet_sk(sk); const struct ip_options_rcu *inet_opt; @@ -545,11 +546,12 @@ static void build_sk_flow_key(struct flowi4 *fl4, const struct sock *sk) RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol, inet_sk_flowi_flags(sk), - daddr, inet->inet_saddr, 0, 0); + daddr, inet->inet_saddr, 0, 0, + sock_i_uid(sk)); rcu_read_unlock(); } -static void ip_rt_build_flow_key(struct flowi4 *fl4, const struct sock *sk, +static void ip_rt_build_flow_key(struct flowi4 *fl4, struct sock *sk, const struct sk_buff *skb) { if (skb) @@ -2287,6 +2289,11 @@ static int rt_fill_info(struct net *net, __be32 dst, __be32 src, nla_put_u32(skb, RTA_MARK, fl4->flowi4_mark)) goto nla_put_failure; + if (!uid_eq(fl4->flowi4_uid, INVALID_UID) && + nla_put_u32(skb, RTA_UID, + from_kuid_munged(current_user_ns(), fl4->flowi4_uid))) + goto nla_put_failure; + error = rt->dst.error; if (rt_is_input_route(rt)) { @@ -2336,6 +2343,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh) int err; int mark; struct sk_buff *skb; + kuid_t uid; err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv4_policy); if (err < 0) @@ -2363,6 +2371,10 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh) dst = tb[RTA_DST] ? nla_get_be32(tb[RTA_DST]) : 0; iif = tb[RTA_IIF] ? nla_get_u32(tb[RTA_IIF]) : 0; mark = tb[RTA_MARK] ? nla_get_u32(tb[RTA_MARK]) : 0; + if (tb[RTA_UID]) + uid = make_kuid(current_user_ns(), nla_get_u32(tb[RTA_UID])); + else + uid = (iif ? INVALID_UID : current_uid()); memset(&fl4, 0, sizeof(fl4)); fl4.daddr = dst; @@ -2370,6 +2382,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh) fl4.flowi4_tos = rtm->rtm_tos; fl4.flowi4_oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0; fl4.flowi4_mark = mark; + fl4.flowi4_uid = uid; if (iif) { struct net_device *dev; diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 5abb45e281be..c94032b95c60 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -353,7 +353,8 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, IPPROTO_TCP, inet_sk_flowi_flags(sk), (opt && opt->srr) ? opt->faddr : ireq->rmt_addr, - ireq->loc_addr, th->source, th->dest); + ireq->loc_addr, th->source, th->dest, + sock_i_uid(sk)); security_req_classify_flow(req, flowi4_to_flowi(&fl4)); rt = ip_route_output_key(sock_net(sk), &fl4); if (IS_ERR(rt)) { diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 0bf5d399a03c..35ab330ed958 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -962,7 +962,8 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, flowi4_init_output(fl4, ipc.oif, sk->sk_mark, tos, RT_SCOPE_UNIVERSE, sk->sk_protocol, inet_sk_flowi_flags(sk)|FLOWI_FLAG_CAN_SLEEP, - faddr, saddr, dport, inet->inet_sport); + faddr, saddr, dport, inet->inet_sport, + sock_i_uid(sk)); security_sk_classify_flow(sk, flowi4_to_flowi(fl4)); rt = ip_route_output_flow(net, fl4, sk); diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index a4cfde67fcb7..d29ae19ae698 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -694,6 +694,7 @@ int inet6_sk_rebuild_header(struct sock *sk) fl6.flowi6_mark = sk->sk_mark; fl6.fl6_dport = inet->inet_dport; fl6.fl6_sport = inet->inet_sport; + fl6.flowi6_uid = sock_i_uid(sk); security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); final_p = fl6_update_dst(&fl6, np->opt, &final); diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c index bb02e176cb70..b903e19463c9 100644 --- a/net/ipv6/ah6.c +++ b/net/ipv6/ah6.c @@ -630,7 +630,7 @@ static void ah6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, if (type == NDISC_REDIRECT) ip6_redirect(skb, net, 0, 0); else - ip6_update_pmtu(skb, net, info, 0, 0); + ip6_update_pmtu(skb, net, info, 0, 0, INVALID_UID); xfrm_state_put(x); } diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index 4b56cbbc7890..00b4a5f6eea4 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -162,6 +162,7 @@ ipv4_connected: fl6.flowi6_mark = sk->sk_mark; fl6.fl6_dport = inet->inet_dport; fl6.fl6_sport = inet->inet_sport; + fl6.flowi6_uid = sock_i_uid(sk); if (!fl6.flowi6_oif && (addr_type&IPV6_ADDR_MULTICAST)) fl6.flowi6_oif = np->mcast_oif; diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index 40ffd72243a4..fdc81cb29e80 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -449,7 +449,7 @@ static void esp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, if (type == NDISC_REDIRECT) ip6_redirect(skb, net, 0, 0); else - ip6_update_pmtu(skb, net, info, 0, 0); + ip6_update_pmtu(skb, net, info, 0, 0, INVALID_UID); xfrm_state_put(x); } diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 28da4003e842..12b1a942dc99 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -90,7 +90,7 @@ static void icmpv6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, struct net *net = dev_net(skb->dev); if (type == ICMPV6_PKT_TOOBIG) - ip6_update_pmtu(skb, net, info, 0, 0); + ip6_update_pmtu(skb, net, info, 0, 0, INVALID_UID); else if (type == NDISC_REDIRECT) ip6_redirect(skb, net, 0, 0); diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c index f1493138d21e..65a46058c854 100644 --- a/net/ipv6/inet6_connection_sock.c +++ b/net/ipv6/inet6_connection_sock.c @@ -84,6 +84,7 @@ struct dst_entry *inet6_csk_route_req(struct sock *sk, fl6->flowi6_mark = inet_rsk(req)->ir_mark; fl6->fl6_dport = inet_rsk(req)->rmt_port; fl6->fl6_sport = inet_rsk(req)->loc_port; + fl6->flowi6_uid = sock_i_uid(sk); security_req_classify_flow(req, flowi6_to_flowi(fl6)); dst = ip6_dst_lookup_flow(sk, fl6, final_p, false); @@ -211,6 +212,7 @@ static struct dst_entry *inet6_csk_route_socket(struct sock *sk, fl6->flowi6_mark = sk->sk_mark; fl6->fl6_sport = inet->inet_sport; fl6->fl6_dport = inet->inet_dport; + fl6->flowi6_uid = sock_i_uid(sk); security_sk_classify_flow(sk, flowi6_to_flowi(fl6)); final_p = fl6_update_dst(fl6, np->opt, &final); diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c index 7af5aee75d98..a1beb59a841e 100644 --- a/net/ipv6/ipcomp6.c +++ b/net/ipv6/ipcomp6.c @@ -78,7 +78,7 @@ static void ipcomp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, if (type == NDISC_REDIRECT) ip6_redirect(skb, net, 0, 0); else - ip6_update_pmtu(skb, net, info, 0, 0); + ip6_update_pmtu(skb, net, info, 0, 0, INVALID_UID); xfrm_state_put(x); } diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c index d4edfceab36f..38ceca8a6358 100644 --- a/net/ipv6/ping.c +++ b/net/ipv6/ping.c @@ -159,6 +159,7 @@ int ping_v6_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, fl6.saddr = np->saddr; fl6.daddr = *daddr; fl6.flowi6_mark = sk->sk_mark; + fl6.flowi6_uid = sock_i_uid(sk); fl6.fl6_icmp_type = user_icmph.icmp6_type; fl6.fl6_icmp_code = user_icmph.icmp6_code; security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index eedff8ccded5..dfef31581f85 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -761,6 +761,7 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, memset(&fl6, 0, sizeof(fl6)); fl6.flowi6_mark = sk->sk_mark; + fl6.flowi6_uid = sock_i_uid(sk); if (sin6) { if (addr_len < SIN6_LEN_RFC2133) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 8ecf44af7c2e..bad36468dcd7 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1099,7 +1099,7 @@ static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, } void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu, - int oif, u32 mark) + int oif, u32 mark, kuid_t uid) { const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data; struct dst_entry *dst; @@ -1112,6 +1112,7 @@ void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu, fl6.daddr = iph->daddr; fl6.saddr = iph->saddr; fl6.flowlabel = ip6_flowinfo(iph); + fl6.flowi6_uid = uid; dst = ip6_route_output(net, NULL, &fl6); if (!dst->error) @@ -1123,7 +1124,7 @@ EXPORT_SYMBOL_GPL(ip6_update_pmtu); void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu) { ip6_update_pmtu(skb, sock_net(sk), mtu, - sk->sk_bound_dev_if, sk->sk_mark); + sk->sk_bound_dev_if, sk->sk_mark, sock_i_uid(sk)); } EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu); @@ -2199,6 +2200,7 @@ static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = { [RTA_PRIORITY] = { .type = NLA_U32 }, [RTA_METRICS] = { .type = NLA_NESTED }, [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) }, + [RTA_UID] = { .type = NLA_U32 }, }; static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh, @@ -2585,6 +2587,12 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh) if (tb[RTA_OIF]) oif = nla_get_u32(tb[RTA_OIF]); + if (tb[RTA_UID]) + fl6.flowi6_uid = make_kuid(current_user_ns(), + nla_get_u32(tb[RTA_UID])); + else + fl6.flowi6_uid = iif ? INVALID_UID : current_uid(); + if (iif) { struct net_device *dev; int flags = 0; diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index 1efbc6f44a6a..ba8622daffd7 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -243,6 +243,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) fl6.flowi6_mark = ireq->ir_mark; fl6.fl6_dport = inet_rsk(req)->rmt_port; fl6.fl6_sport = inet_sk(sk)->inet_sport; + fl6.flowi6_uid = sock_i_uid(sk); security_req_classify_flow(req, flowi6_to_flowi(&fl6)); dst = ip6_dst_lookup_flow(sk, &fl6, final_p, false); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 6e882dadb4f8..a4fc647deb00 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -252,6 +252,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, fl6.flowi6_mark = sk->sk_mark; fl6.fl6_dport = usin->sin6_port; fl6.fl6_sport = inet->inet_sport; + fl6.flowi6_uid = sock_i_uid(sk); final_p = fl6_update_dst(&fl6, np->opt, &final); diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 42923b14dfa6..e6dd85da9062 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -1147,6 +1147,7 @@ do_udp_sendmsg: fl6.flowi6_oif = np->sticky_pktinfo.ipi6_ifindex; fl6.flowi6_mark = sk->sk_mark; + fl6.flowi6_uid = sock_i_uid(sk); if (msg->msg_controllen) { opt = &opt_space; From 7ae573c139c8a91bed58060818a2559526aee741 Mon Sep 17 00:00:00 2001 From: Dmitry Shmidt Date: Thu, 26 Jun 2014 09:26:21 -0700 Subject: [PATCH 05/82] net: wireless: Fix cfg80211_vendor_cmd_alloc_reply_skb Change-Id: Ia8da6cdacd5668d10f8955972d996177305b7228 Signed-off-by: Dmitry Shmidt --- include/net/cfg80211.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 304e41381a1f..2ebb168e5a5b 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -3696,8 +3696,8 @@ void __cfg80211_send_event_skb(struct sk_buff *skb, gfp_t gfp); static inline struct sk_buff * cfg80211_vendor_cmd_alloc_reply_skb(struct wiphy *wiphy, int approxlen) { - return __cfg80211_alloc_reply_skb(wiphy, NL80211_CMD_TESTMODE, - NL80211_ATTR_TESTDATA, approxlen); + return __cfg80211_alloc_reply_skb(wiphy, NL80211_CMD_VENDOR, + NL80211_ATTR_VENDOR_DATA, approxlen); } /** From 3c9e49908b882631f3737022e707ffddf0b7c230 Mon Sep 17 00:00:00 2001 From: Minsung Kim Date: Wed, 25 Jun 2014 19:44:50 +0900 Subject: [PATCH 06/82] cpufreq: fix sleeping in atomic context when realloc freq_table for all_time_in_state Commit 40cf2f8 (cpufreq: Persist cpufreq time in state data across hotplug) causes the following call trace to be spit on boot: BUG: sleeping function called from invalid context at mm/slub.c:936 in_atomic(): 1, irqs_disabled(): 0, pid: 1, name: swapper/0 CPU: 6 PID: 1 Comm: swapper/0 Not tainted 3.10.9-20140624.172707-eng-gd6c0f69-dirty #50 Backtrace: [] (dump_backtrace+0x0/0x10c) from [] (show_stack+0x18/0x1c) r6:ffff1788 r5:c0c020c0 r4:e609c000 r3:00000000 [] (show_stack+0x0/0x1c) from [] (dump_stack+0x20/0x28) [] (dump_stack+0x0/0x28) from [] (__might_sleep+0x104/0x120) [] (__might_sleep+0x0/0x120) from [] (__kmalloc_track_caller+0x144/0x274) r6:00000000 r5:e609c000 r4:e6802140 [] (__kmalloc_track_caller+0x0/0x274) from [] (krealloc+0x58/0xb0) [] (krealloc+0x0/0xb0) from [] (cpufreq_allstats_create+0x120/0x204) r8:e4c4ff00 r7:c0d266b8 r6:0013d620 r5:e4c4e600 r4:00000001 r3:e535d6d0 [] (cpufreq_allstats_create+0x0/0x204) from [] (cpufreq_stat_notifier_policy+0xb8/0xd0) [] (cpufreq_stat_notifier_policy+0x0/0xd0) from [] (notifier_call_chain+0x4c/0x8c) r5:00000000 r4:fffffffe [] (notifier_call_chain+0x0/0x8c) from [] (__blocking_notifier_call_chain+0x50/0x68) r8:c0cd4d00 r7:00000002 r6:e609dd7c r5:ffffffff r4:c0d25a4c r3:ffffffff [] (__blocking_notifier_call_chain+0x0/0x68) from [] (blocking_notifier_call_chain+0x20/0x28) r7:c0e24f30 r6:00000000 r5:e53e1e00 r4:e609dd7c [] (blocking_notifier_call_chain+0x0/0x28) from [] (__cpufreq_set_policy+0xc0/0x1d0) [] (__cpufreq_set_policy+0x0/0x1d0) from [] (cpufreq_add_dev_interface+0x20c/0x270) r7:00000008 r6:00000000 r5:e53e1e00 r4:e53e1e58 [] (cpufreq_add_dev_interface+0x0/0x270) from [] (cpufreq_add_dev+0x33c/0x420) [] (cpufreq_add_dev+0x0/0x420) from [] (subsys_interface_register+0x80/0xbc) [] (subsys_interface_register+0x0/0xbc) from [] (cpufreq_register_driver+0x8c/0x194) Change-Id: If77a656d0ea60a8fc4083283d104509fa6c07f8f Signed-off-by: Minsung Kim --- drivers/cpufreq/cpufreq_stats.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/cpufreq/cpufreq_stats.c b/drivers/cpufreq/cpufreq_stats.c index 3f5e279ff9d8..7a2bcac3ad7f 100644 --- a/drivers/cpufreq/cpufreq_stats.c +++ b/drivers/cpufreq/cpufreq_stats.c @@ -424,7 +424,7 @@ static void add_all_freq_table(unsigned int freq) unsigned int size; size = sizeof(unsigned int) * (all_freq_table->table_size + 1); all_freq_table->freq_table = krealloc(all_freq_table->freq_table, - size, GFP_KERNEL); + size, GFP_ATOMIC); if (IS_ERR(all_freq_table->freq_table)) { pr_warn("Could not reallocate memory for freq_table\n"); all_freq_table->freq_table = NULL; From 8d0e99be244a647f8f0dd8741238446088e1c30f Mon Sep 17 00:00:00 2001 From: Yann Soubeyrand Date: Wed, 18 Jun 2014 14:57:29 +0200 Subject: [PATCH 07/82] net: wireless: fix misplaced #endif in net/wireless/nl80211.c The patch "nl80211: cumulative vendor command support patch" introduced compilation error in file net/wireless/nl80211.c. The nl80211_vendor_mcgrp variable is defined only if the CONFIG_NL80211_TESTMODE preprocessor constant is defined. However, this variable is later used wether CONFIG_NL80211_TESTMODE is defined or not. The cause is a misplaced #endif. Change-Id: I466488285578d57e6554a1f8ebe71d4f3385ecf2 Signed-off-by: Dmitry Shmidt --- net/wireless/nl80211.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 150a38fc346e..ec1356fab67e 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -10973,11 +10973,11 @@ int nl80211_init(void) err = genl_register_mc_group(&nl80211_fam, &nl80211_testmode_mcgrp); if (err) goto err_out; -#endif err = genl_register_mc_group(&nl80211_fam, &nl80211_vendor_mcgrp); if (err) goto err_out; +#endif err = netlink_register_notifier(&nl80211_netlink_notifier); if (err) From dd979cc254c3aeec85927dbaad669d1ecd193c6f Mon Sep 17 00:00:00 2001 From: Dmitry Shmidt Date: Tue, 1 Jul 2014 14:48:15 -0700 Subject: [PATCH 08/82] net: cfg80211: Fix wiphy_vendor_command 'doit' type Change-Id: I5b1732eed7ac4f6bc267b4baa2153f6de2e16dc8 Signed-off-by: Dmitry Shmidt --- include/net/cfg80211.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 2ebb168e5a5b..d9681a288ce6 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -2497,7 +2497,7 @@ struct wiphy_vendor_command { struct nl80211_vendor_cmd_info info; u32 flags; int (*doit)(struct wiphy *wiphy, struct wireless_dev *wdev, - void *data, int data_len); + const void *data, int data_len); }; /** From 70bcc368f145d0871379f5ffe0428e65e10345e1 Mon Sep 17 00:00:00 2001 From: Daniel Rosenberg Date: Fri, 27 Jun 2014 16:39:35 -0700 Subject: [PATCH 09/82] input: Made keyreset more robust Switched do_restart to run in a seperate workqueue to handle cases where kernel_restart hangs. Change-Id: I1ecd61f8d0859f1a86d37c692351d644b5db9c69 Signed-off-by: Daniel Rosenberg --- drivers/input/keyreset.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/input/keyreset.c b/drivers/input/keyreset.c index eaaccde82210..7fbf7247e65f 100644 --- a/drivers/input/keyreset.c +++ b/drivers/input/keyreset.c @@ -27,9 +27,10 @@ struct keyreset_state { int restart_requested; int (*reset_fn)(void); struct platform_device *pdev_child; + struct work_struct restart_work; }; -static void do_restart(void) +static void do_restart(struct work_struct *unused) { sys_sync(); kernel_restart(NULL); @@ -44,7 +45,7 @@ static void do_reset_fn(void *priv) state->restart_requested = state->reset_fn(); } else { pr_info("keyboard reset\n"); - do_restart(); + schedule_work(&state->restart_work); state->restart_requested = 1; } } @@ -69,6 +70,7 @@ static int keyreset_probe(struct platform_device *pdev) if (!state->pdev_child) return -ENOMEM; state->pdev_child->dev.parent = &pdev->dev; + INIT_WORK(&state->restart_work, do_restart); keyp = pdata->keys_down; while ((key = *keyp++)) { From 455b09d66a9ccfc572497ae88375ae343ff9ae66 Mon Sep 17 00:00:00 2001 From: Sreeram Ramachandran Date: Tue, 8 Jul 2014 11:57:14 -0700 Subject: [PATCH 10/82] Handle 'sk' being NULL in UID-based routing. Bug: 15413527 Change-Id: Iab1fae9da6053b284591628ef1de878761b137b1 Signed-off-by: Sreeram Ramachandran --- include/net/route.h | 2 +- net/ipv4/route.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/include/net/route.h b/include/net/route.h index b5b44875543e..647bb2adbffd 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -142,7 +142,7 @@ static inline struct rtable *ip_route_output_ports(struct net *net, struct flowi flowi4_init_output(fl4, oif, sk ? sk->sk_mark : 0, tos, RT_SCOPE_UNIVERSE, proto, sk ? inet_sk_flowi_flags(sk) : 0, - daddr, saddr, dport, sport, sock_i_uid(sk)); + daddr, saddr, dport, sport, sk ? sock_i_uid(sk) : 0); if (sk) security_sk_classify_flow(sk, flowi4_to_flowi(fl4)); return ip_route_output_flow(net, fl4, sk); diff --git a/net/ipv4/route.c b/net/ipv4/route.c index ef2fe9d2e9e6..42cd979d1633 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -517,7 +517,7 @@ static void __build_flow_key(struct flowi4 *fl4, struct sock *sk, RT_SCOPE_UNIVERSE, prot, flow_flags, iph->daddr, iph->saddr, 0, 0, - sock_i_uid(sk)); + sk ? sock_i_uid(sk) : 0); } static void build_skb_flow_key(struct flowi4 *fl4, const struct sk_buff *skb, From f8fe2735daaf662876d1333075991997c04d5359 Mon Sep 17 00:00:00 2001 From: Anson Jacob Date: Mon, 23 Jun 2014 19:07:44 +0800 Subject: [PATCH 11/82] usb: gadget: f_accessory: Enabled Zero Length Packet (ZLP) for acc_write Accessory connected to Android Device requires Zero Length Packet (ZLP) to be written when data transferred out from the Android device are multiples of wMaxPacketSize (64bytes (Full-Speed) / 512bytes (High-Speed)) to end the transfer. Change-Id: Ib2c2c0ab98ef9afa10e74a720142deca5c0ed476 Signed-off-by: Anson Jacob --- drivers/usb/gadget/f_accessory.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/usb/gadget/f_accessory.c b/drivers/usb/gadget/f_accessory.c index 53e50b5e8612..a401acdceb4d 100644 --- a/drivers/usb/gadget/f_accessory.c +++ b/drivers/usb/gadget/f_accessory.c @@ -662,10 +662,17 @@ static ssize_t acc_write(struct file *fp, const char __user *buf, break; } - if (count > BULK_BUFFER_SIZE) + if (count > BULK_BUFFER_SIZE) { xfer = BULK_BUFFER_SIZE; - else + /* ZLP, They will be more TX requests so not yet. */ + req->zero = 0; + } else { xfer = count; + /* If the data length is a multple of the + * maxpacket size then send a zero length packet(ZLP). + */ + req->zero = ((xfer % dev->ep_in->maxpacket) == 0); + } if (copy_from_user(req->buf, buf, xfer)) { r = -EFAULT; break; From 2ca27aef6f907c33d85915df5f37ddee32a92742 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 28 Aug 2013 22:29:55 +0200 Subject: [PATCH 12/82] HID: validate HID report id size The "Report ID" field of a HID report is used to build indexes of reports. The kernel's index of these is limited to 256 entries, so any malicious device that sets a Report ID greater than 255 will trigger memory corruption on the host: [ 1347.156239] BUG: unable to handle kernel paging request at ffff88094958a878 [ 1347.156261] IP: [] hid_register_report+0x2a/0x8b CVE-2013-2888 Signed-off-by: Kees Cook Cc: stable@kernel.org Signed-off-by: Jiri Kosina --- drivers/hid/hid-core.c | 10 +++++++--- include/linux/hid.h | 4 +++- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c index 264f55099940..95a97a44fe74 100644 --- a/drivers/hid/hid-core.c +++ b/drivers/hid/hid-core.c @@ -63,6 +63,8 @@ struct hid_report *hid_register_report(struct hid_device *device, unsigned type, struct hid_report_enum *report_enum = device->report_enum + type; struct hid_report *report; + if (id >= HID_MAX_IDS) + return NULL; if (report_enum->report_id_hash[id]) return report_enum->report_id_hash[id]; @@ -404,8 +406,10 @@ static int hid_parser_global(struct hid_parser *parser, struct hid_item *item) case HID_GLOBAL_ITEM_TAG_REPORT_ID: parser->global.report_id = item_udata(item); - if (parser->global.report_id == 0) { - hid_err(parser->device, "report_id 0 is invalid\n"); + if (parser->global.report_id == 0 || + parser->global.report_id >= HID_MAX_IDS) { + hid_err(parser->device, "report_id %u is invalid\n", + parser->global.report_id); return -1; } return 0; @@ -575,7 +579,7 @@ static void hid_close_report(struct hid_device *device) for (i = 0; i < HID_REPORT_TYPES; i++) { struct hid_report_enum *report_enum = device->report_enum + i; - for (j = 0; j < 256; j++) { + for (j = 0; j < HID_MAX_IDS; j++) { struct hid_report *report = report_enum->report_id_hash[j]; if (report) hid_free_report(report); diff --git a/include/linux/hid.h b/include/linux/hid.h index 8136c6d99037..95b6ddf0fa45 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -393,10 +393,12 @@ struct hid_report { struct hid_device *device; /* associated device */ }; +#define HID_MAX_IDS 256 + struct hid_report_enum { unsigned numbered; struct list_head report_list; - struct hid_report *report_id_hash[256]; + struct hid_report *report_id_hash[HID_MAX_IDS]; }; #define HID_REPORT_TYPES 3 From b8b84374b4876b2a62187102392f72a8beaf217e Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 11 Sep 2013 21:56:50 +0200 Subject: [PATCH 13/82] HID: provide a helper for validating hid reports Many drivers need to validate the characteristics of their HID report during initialization to avoid misusing the reports. This adds a common helper to perform validation of the report exisitng, the field existing, and the expected number of values within the field. Signed-off-by: Kees Cook Cc: stable@vger.kernel.org Reviewed-by: Benjamin Tissoires Signed-off-by: Jiri Kosina --- drivers/hid/hid-core.c | 58 ++++++++++++++++++++++++++++++++++++++++++ include/linux/hid.h | 4 +++ 2 files changed, 62 insertions(+) diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c index 95a97a44fe74..c6d42deb9163 100644 --- a/drivers/hid/hid-core.c +++ b/drivers/hid/hid-core.c @@ -759,6 +759,64 @@ int hid_parse_report(struct hid_device *hid, __u8 *start, unsigned size) } EXPORT_SYMBOL_GPL(hid_parse_report); +static const char * const hid_report_names[] = { + "HID_INPUT_REPORT", + "HID_OUTPUT_REPORT", + "HID_FEATURE_REPORT", +}; +/** + * hid_validate_values - validate existing device report's value indexes + * + * @device: hid device + * @type: which report type to examine + * @id: which report ID to examine (0 for first) + * @field_index: which report field to examine + * @report_counts: expected number of values + * + * Validate the number of values in a given field of a given report, after + * parsing. + */ +struct hid_report *hid_validate_values(struct hid_device *hid, + unsigned int type, unsigned int id, + unsigned int field_index, + unsigned int report_counts) +{ + struct hid_report *report; + + if (type > HID_FEATURE_REPORT) { + hid_err(hid, "invalid HID report type %u\n", type); + return NULL; + } + + if (id >= HID_MAX_IDS) { + hid_err(hid, "invalid HID report id %u\n", id); + return NULL; + } + + /* + * Explicitly not using hid_get_report() here since it depends on + * ->numbered being checked, which may not always be the case when + * drivers go to access report values. + */ + report = hid->report_enum[type].report_id_hash[id]; + if (!report) { + hid_err(hid, "missing %s %u\n", hid_report_names[type], id); + return NULL; + } + if (report->maxfield <= field_index) { + hid_err(hid, "not enough fields in %s %u\n", + hid_report_names[type], id); + return NULL; + } + if (report->field[field_index]->report_count < report_counts) { + hid_err(hid, "not enough values in %s %u field %u\n", + hid_report_names[type], id, field_index); + return NULL; + } + return report; +} +EXPORT_SYMBOL_GPL(hid_validate_values); + /** * hid_open_report - open a driver-specific device report * diff --git a/include/linux/hid.h b/include/linux/hid.h index 95b6ddf0fa45..1f3c5f7b3bc5 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -749,6 +749,10 @@ void hid_output_report(struct hid_report *report, __u8 *data); struct hid_device *hid_allocate_device(void); struct hid_report *hid_register_report(struct hid_device *device, unsigned type, unsigned id); int hid_parse_report(struct hid_device *hid, __u8 *start, unsigned size); +struct hid_report *hid_validate_values(struct hid_device *hid, + unsigned int type, unsigned int id, + unsigned int field_index, + unsigned int report_counts); int hid_open_report(struct hid_device *device); int hid_check_keys_pressed(struct hid_device *hid); int hid_connect(struct hid_device *hid, unsigned int connect_mask); From 728a564fa767d186ea1f95fbf81b43e9d865dc0b Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 11 Sep 2013 21:56:51 +0200 Subject: [PATCH 14/82] HID: zeroplus: validate output report details The zeroplus HID driver was not checking the size of allocated values in fields it used. A HID device could send a malicious output report that would cause the driver to write beyond the output report allocation during initialization, causing a heap overflow: [ 1442.728680] usb 1-1: New USB device found, idVendor=0c12, idProduct=0005 ... [ 1466.243173] BUG kmalloc-192 (Tainted: G W ): Redzone overwritten CVE-2013-2889 Signed-off-by: Kees Cook Cc: stable@vger.kernel.org Reviewed-by: Benjamin Tissoires Signed-off-by: Jiri Kosina --- drivers/hid/hid-zpff.c | 18 +++++------------- 1 file changed, 5 insertions(+), 13 deletions(-) diff --git a/drivers/hid/hid-zpff.c b/drivers/hid/hid-zpff.c index 6ec28a37c146..a29756c6ca02 100644 --- a/drivers/hid/hid-zpff.c +++ b/drivers/hid/hid-zpff.c @@ -68,21 +68,13 @@ static int zpff_init(struct hid_device *hid) struct hid_report *report; struct hid_input *hidinput = list_entry(hid->inputs.next, struct hid_input, list); - struct list_head *report_list = - &hid->report_enum[HID_OUTPUT_REPORT].report_list; struct input_dev *dev = hidinput->input; - int error; + int i, error; - if (list_empty(report_list)) { - hid_err(hid, "no output report found\n"); - return -ENODEV; - } - - report = list_entry(report_list->next, struct hid_report, list); - - if (report->maxfield < 4) { - hid_err(hid, "not enough fields in report\n"); - return -ENODEV; + for (i = 0; i < 4; i++) { + report = hid_validate_values(hid, HID_OUTPUT_REPORT, 0, i, 1); + if (!report) + return -ENODEV; } zpff = kzalloc(sizeof(struct zpff_device), GFP_KERNEL); From 7e62de4584a6ad505e164ae92c9c1571da46c9bb Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 11 Sep 2013 21:56:53 +0200 Subject: [PATCH 15/82] HID: steelseries: validate output report details A HID device could send a malicious output report that would cause the steelseries HID driver to write beyond the output report allocation during initialization, causing a heap overflow: [ 167.981534] usb 1-1: New USB device found, idVendor=1038, idProduct=1410 ... [ 182.050547] BUG kmalloc-256 (Tainted: G W ): Redzone overwritten CVE-2013-2891 Signed-off-by: Kees Cook Cc: stable@vger.kernel.org Reviewed-by: Benjamin Tissoires Signed-off-by: Jiri Kosina --- drivers/hid/hid-steelseries.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/hid/hid-steelseries.c b/drivers/hid/hid-steelseries.c index d16491192112..29f328f411fb 100644 --- a/drivers/hid/hid-steelseries.c +++ b/drivers/hid/hid-steelseries.c @@ -249,6 +249,11 @@ static int steelseries_srws1_probe(struct hid_device *hdev, goto err_free; } + if (!hid_validate_values(hdev, HID_OUTPUT_REPORT, 0, 0, 16)) { + ret = -ENODEV; + goto err_free; + } + ret = hid_hw_start(hdev, HID_CONNECT_DEFAULT); if (ret) { hid_err(hdev, "hw start failed\n"); From e3c850750a74e04855fc61e9f5d2854eeca8087c Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 28 Aug 2013 22:30:49 +0200 Subject: [PATCH 16/82] HID: pantherlord: validate output report details A HID device could send a malicious output report that would cause the pantherlord HID driver to write beyond the output report allocation during initialization, causing a heap overflow: [ 310.939483] usb 1-1: New USB device found, idVendor=0e8f, idProduct=0003 ... [ 315.980774] BUG kmalloc-192 (Tainted: G W ): Redzone overwritten CVE-2013-2892 Signed-off-by: Kees Cook Cc: stable@kernel.org Signed-off-by: Jiri Kosina --- drivers/hid/hid-pl.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/hid/hid-pl.c b/drivers/hid/hid-pl.c index d29112fa5cd5..2dcd7d98dbd6 100644 --- a/drivers/hid/hid-pl.c +++ b/drivers/hid/hid-pl.c @@ -132,8 +132,14 @@ static int plff_init(struct hid_device *hid) strong = &report->field[0]->value[2]; weak = &report->field[0]->value[3]; debug("detected single-field device"); - } else if (report->maxfield >= 4 && report->field[0]->maxusage == 1 && - report->field[0]->usage[0].hid == (HID_UP_LED | 0x43)) { + } else if (report->field[0]->maxusage == 1 && + report->field[0]->usage[0].hid == + (HID_UP_LED | 0x43) && + report->maxfield >= 4 && + report->field[0]->report_count >= 1 && + report->field[1]->report_count >= 1 && + report->field[2]->report_count >= 1 && + report->field[3]->report_count >= 1) { report->field[0]->value[0] = 0x00; report->field[1]->value[0] = 0x00; strong = &report->field[2]->value[0]; From d87aff426add54f65131d450a06db618742a7dfe Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 11 Sep 2013 21:56:54 +0200 Subject: [PATCH 17/82] HID: LG: validate HID output report details A HID device could send a malicious output report that would cause the lg, lg3, and lg4 HID drivers to write beyond the output report allocation during an event, causing a heap overflow: [ 325.245240] usb 1-1: New USB device found, idVendor=046d, idProduct=c287 ... [ 414.518960] BUG kmalloc-4096 (Not tainted): Redzone overwritten Additionally, while lg2 did correctly validate the report details, it was cleaned up and shortened. CVE-2013-2893 Signed-off-by: Kees Cook Cc: stable@vger.kernel.org Reviewed-by: Benjamin Tissoires Signed-off-by: Jiri Kosina --- drivers/hid/hid-lg2ff.c | 19 +++---------------- drivers/hid/hid-lg3ff.c | 29 ++++++----------------------- drivers/hid/hid-lg4ff.c | 20 +------------------- drivers/hid/hid-lgff.c | 17 ++--------------- 4 files changed, 12 insertions(+), 73 deletions(-) diff --git a/drivers/hid/hid-lg2ff.c b/drivers/hid/hid-lg2ff.c index b3cd1507dda2..1a42eaa6ca02 100644 --- a/drivers/hid/hid-lg2ff.c +++ b/drivers/hid/hid-lg2ff.c @@ -64,26 +64,13 @@ int lg2ff_init(struct hid_device *hid) struct hid_report *report; struct hid_input *hidinput = list_entry(hid->inputs.next, struct hid_input, list); - struct list_head *report_list = - &hid->report_enum[HID_OUTPUT_REPORT].report_list; struct input_dev *dev = hidinput->input; int error; - if (list_empty(report_list)) { - hid_err(hid, "no output report found\n"); + /* Check that the report looks ok */ + report = hid_validate_values(hid, HID_OUTPUT_REPORT, 0, 0, 7); + if (!report) return -ENODEV; - } - - report = list_entry(report_list->next, struct hid_report, list); - - if (report->maxfield < 1) { - hid_err(hid, "output report is empty\n"); - return -ENODEV; - } - if (report->field[0]->report_count < 7) { - hid_err(hid, "not enough values in the field\n"); - return -ENODEV; - } lg2ff = kmalloc(sizeof(struct lg2ff_device), GFP_KERNEL); if (!lg2ff) diff --git a/drivers/hid/hid-lg3ff.c b/drivers/hid/hid-lg3ff.c index e52f181f6aa1..8c2da183d3bc 100644 --- a/drivers/hid/hid-lg3ff.c +++ b/drivers/hid/hid-lg3ff.c @@ -66,10 +66,11 @@ static int hid_lg3ff_play(struct input_dev *dev, void *data, int x, y; /* - * Maxusage should always be 63 (maximum fields) - * likely a better way to ensure this data is clean + * Available values in the field should always be 63, but we only use up to + * 35. Instead, clear the entire area, however big it is. */ - memset(report->field[0]->value, 0, sizeof(__s32)*report->field[0]->maxusage); + memset(report->field[0]->value, 0, + sizeof(__s32) * report->field[0]->report_count); switch (effect->type) { case FF_CONSTANT: @@ -129,32 +130,14 @@ static const signed short ff3_joystick_ac[] = { int lg3ff_init(struct hid_device *hid) { struct hid_input *hidinput = list_entry(hid->inputs.next, struct hid_input, list); - struct list_head *report_list = &hid->report_enum[HID_OUTPUT_REPORT].report_list; struct input_dev *dev = hidinput->input; - struct hid_report *report; - struct hid_field *field; const signed short *ff_bits = ff3_joystick_ac; int error; int i; - /* Find the report to use */ - if (list_empty(report_list)) { - hid_err(hid, "No output report found\n"); - return -1; - } - /* Check that the report looks ok */ - report = list_entry(report_list->next, struct hid_report, list); - if (!report) { - hid_err(hid, "NULL output report\n"); - return -1; - } - - field = report->field[0]; - if (!field) { - hid_err(hid, "NULL field\n"); - return -1; - } + if (!hid_validate_values(hid, HID_OUTPUT_REPORT, 0, 0, 35)) + return -ENODEV; /* Assume single fixed device G940 */ for (i = 0; ff_bits[i] >= 0; i++) diff --git a/drivers/hid/hid-lg4ff.c b/drivers/hid/hid-lg4ff.c index 0ddae2a00d59..8782fe1aaa07 100644 --- a/drivers/hid/hid-lg4ff.c +++ b/drivers/hid/hid-lg4ff.c @@ -484,34 +484,16 @@ static enum led_brightness lg4ff_led_get_brightness(struct led_classdev *led_cde int lg4ff_init(struct hid_device *hid) { struct hid_input *hidinput = list_entry(hid->inputs.next, struct hid_input, list); - struct list_head *report_list = &hid->report_enum[HID_OUTPUT_REPORT].report_list; struct input_dev *dev = hidinput->input; - struct hid_report *report; - struct hid_field *field; struct lg4ff_device_entry *entry; struct lg_drv_data *drv_data; struct usb_device_descriptor *udesc; int error, i, j; __u16 bcdDevice, rev_maj, rev_min; - /* Find the report to use */ - if (list_empty(report_list)) { - hid_err(hid, "No output report found\n"); - return -1; - } - /* Check that the report looks ok */ - report = list_entry(report_list->next, struct hid_report, list); - if (!report) { - hid_err(hid, "NULL output report\n"); + if (!hid_validate_values(hid, HID_OUTPUT_REPORT, 0, 0, 7)) return -1; - } - - field = report->field[0]; - if (!field) { - hid_err(hid, "NULL field\n"); - return -1; - } /* Check what wheel has been connected */ for (i = 0; i < ARRAY_SIZE(lg4ff_devices); i++) { diff --git a/drivers/hid/hid-lgff.c b/drivers/hid/hid-lgff.c index d7ea8c845b40..e1394af0ae7b 100644 --- a/drivers/hid/hid-lgff.c +++ b/drivers/hid/hid-lgff.c @@ -128,27 +128,14 @@ static void hid_lgff_set_autocenter(struct input_dev *dev, u16 magnitude) int lgff_init(struct hid_device* hid) { struct hid_input *hidinput = list_entry(hid->inputs.next, struct hid_input, list); - struct list_head *report_list = &hid->report_enum[HID_OUTPUT_REPORT].report_list; struct input_dev *dev = hidinput->input; - struct hid_report *report; - struct hid_field *field; const signed short *ff_bits = ff_joystick; int error; int i; - /* Find the report to use */ - if (list_empty(report_list)) { - hid_err(hid, "No output report found\n"); - return -1; - } - /* Check that the report looks ok */ - report = list_entry(report_list->next, struct hid_report, list); - field = report->field[0]; - if (!field) { - hid_err(hid, "NULL field\n"); - return -1; - } + if (!hid_validate_values(hid, HID_OUTPUT_REPORT, 0, 0, 7)) + return -ENODEV; for (i = 0; i < ARRAY_SIZE(devices); i++) { if (dev->id.vendor == devices[i].idVendor && From 255b0df27b1f2b590226f29a78169cc529e74cf0 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 11 Sep 2013 21:56:55 +0200 Subject: [PATCH 18/82] HID: lenovo-tpkbd: validate output report details A HID device could send a malicious output report that would cause the lenovo-tpkbd HID driver to write just beyond the output report allocation during initialization, causing a heap overflow: [ 76.109807] usb 1-1: New USB device found, idVendor=17ef, idProduct=6009 ... [ 80.462540] BUG kmalloc-192 (Tainted: G W ): Redzone overwritten CVE-2013-2894 Signed-off-by: Kees Cook Cc: stable@vger.kernel.org Signed-off-by: Benjamin Tissoires Signed-off-by: Jiri Kosina --- drivers/hid/hid-lenovo-tpkbd.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/hid/hid-lenovo-tpkbd.c b/drivers/hid/hid-lenovo-tpkbd.c index 07837f5a4eb8..762d988548a2 100644 --- a/drivers/hid/hid-lenovo-tpkbd.c +++ b/drivers/hid/hid-lenovo-tpkbd.c @@ -339,7 +339,15 @@ static int tpkbd_probe_tp(struct hid_device *hdev) struct tpkbd_data_pointer *data_pointer; size_t name_sz = strlen(dev_name(dev)) + 16; char *name_mute, *name_micmute; - int ret; + int i, ret; + + /* Validate required reports. */ + for (i = 0; i < 4; i++) { + if (!hid_validate_values(hdev, HID_FEATURE_REPORT, 4, i, 1)) + return -ENODEV; + } + if (!hid_validate_values(hdev, HID_OUTPUT_REPORT, 3, 0, 2)) + return -ENODEV; if (sysfs_create_group(&hdev->dev.kobj, &tpkbd_attr_group_pointer)) { From 142ae08aba82c8226779d6cc8cddc95a83923d77 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 11 Sep 2013 21:56:56 +0200 Subject: [PATCH 19/82] HID: logitech-dj: validate output report details A HID device could send a malicious output report that would cause the logitech-dj HID driver to leak kernel memory contents to the device, or trigger a NULL dereference during initialization: [ 304.424553] usb 1-1: New USB device found, idVendor=046d, idProduct=c52b ... [ 304.780467] BUG: unable to handle kernel NULL pointer dereference at 0000000000000028 [ 304.781409] IP: [] logi_dj_recv_send_report.isra.11+0x1a/0x90 CVE-2013-2895 Signed-off-by: Kees Cook Cc: stable@vger.kernel.org Signed-off-by: Benjamin Tissoires Signed-off-by: Jiri Kosina --- drivers/hid/hid-logitech-dj.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/hid/hid-logitech-dj.c b/drivers/hid/hid-logitech-dj.c index 5207591a598c..0522b80eab5a 100644 --- a/drivers/hid/hid-logitech-dj.c +++ b/drivers/hid/hid-logitech-dj.c @@ -421,7 +421,7 @@ static int logi_dj_recv_send_report(struct dj_receiver_dev *djrcv_dev, struct hid_report *report; struct hid_report_enum *output_report_enum; u8 *data = (u8 *)(&dj_report->device_index); - int i; + unsigned int i; output_report_enum = &hdev->report_enum[HID_OUTPUT_REPORT]; report = output_report_enum->report_id_hash[REPORT_ID_DJ_SHORT]; @@ -431,7 +431,7 @@ static int logi_dj_recv_send_report(struct dj_receiver_dev *djrcv_dev, return -ENODEV; } - for (i = 0; i < report->field[0]->report_count; i++) + for (i = 0; i < DJREPORT_SHORT_LENGTH - 1; i++) report->field[0]->value[i] = data[i]; hid_hw_request(hdev, report, HID_REQ_SET_REPORT); @@ -738,6 +738,12 @@ static int logi_dj_probe(struct hid_device *hdev, goto hid_parse_fail; } + if (!hid_validate_values(hdev, HID_OUTPUT_REPORT, REPORT_ID_DJ_SHORT, + 0, DJREPORT_SHORT_LENGTH - 1)) { + retval = -ENODEV; + goto hid_parse_fail; + } + /* Starts the usb device and connects to upper interfaces hiddev and * hidraw */ retval = hid_hw_start(hdev, HID_CONNECT_DEFAULT); From 03507a663cef349083e8d1abc4b9e9b1970a16e8 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 28 Aug 2013 22:31:28 +0200 Subject: [PATCH 20/82] HID: ntrig: validate feature report details A HID device could send a malicious feature report that would cause the ntrig HID driver to trigger a NULL dereference during initialization: [57383.031190] usb 3-1: New USB device found, idVendor=1b96, idProduct=0001 ... [57383.315193] BUG: unable to handle kernel NULL pointer dereference at 0000000000000030 [57383.315308] IP: [] ntrig_probe+0x25e/0x420 [hid_ntrig] CVE-2013-2896 Signed-off-by: Kees Cook Cc: stable@kernel.org Signed-off-by: Rafi Rubin Signed-off-by: Jiri Kosina --- drivers/hid/hid-ntrig.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/hid/hid-ntrig.c b/drivers/hid/hid-ntrig.c index ef95102515e4..5482156ab4de 100644 --- a/drivers/hid/hid-ntrig.c +++ b/drivers/hid/hid-ntrig.c @@ -115,7 +115,8 @@ static inline int ntrig_get_mode(struct hid_device *hdev) struct hid_report *report = hdev->report_enum[HID_FEATURE_REPORT]. report_id_hash[0x0d]; - if (!report) + if (!report || report->maxfield < 1 || + report->field[0]->report_count < 1) return -EINVAL; hid_hw_request(hdev, report, HID_REQ_GET_REPORT); From 9b24c9c51653d4400d9a4aea2dc28124dce2893c Mon Sep 17 00:00:00 2001 From: Benjamin Tissoires Date: Wed, 11 Sep 2013 21:56:57 +0200 Subject: [PATCH 21/82] HID: validate feature and input report details When dealing with usage_index, be sure to properly use unsigned instead of int to avoid overflows. When working on report fields, always validate that their report_counts are in bounds. Without this, a HID device could report a malicious feature report that could trick the driver into a heap overflow: [ 634.885003] usb 1-1: New USB device found, idVendor=0596, idProduct=0500 ... [ 676.469629] BUG kmalloc-192 (Tainted: G W ): Redzone overwritten CVE-2013-2897 Cc: stable@vger.kernel.org Signed-off-by: Benjamin Tissoires Acked-by: Kees Cook Signed-off-by: Jiri Kosina --- drivers/hid/hid-core.c | 16 +++++++--------- drivers/hid/hid-input.c | 11 ++++++++++- 2 files changed, 17 insertions(+), 10 deletions(-) diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c index c6d42deb9163..eadcc85e5a62 100644 --- a/drivers/hid/hid-core.c +++ b/drivers/hid/hid-core.c @@ -94,7 +94,6 @@ EXPORT_SYMBOL_GPL(hid_register_report); static struct hid_field *hid_register_field(struct hid_report *report, unsigned usages, unsigned values) { struct hid_field *field; - int i; if (report->maxfield == HID_MAX_FIELDS) { hid_err(report->device, "too many fields in report\n"); @@ -113,9 +112,6 @@ static struct hid_field *hid_register_field(struct hid_report *report, unsigned field->value = (s32 *)(field->usage + usages); field->report = report; - for (i = 0; i < usages; i++) - field->usage[i].usage_index = i; - return field; } @@ -226,9 +222,9 @@ static int hid_add_field(struct hid_parser *parser, unsigned report_type, unsign { struct hid_report *report; struct hid_field *field; - int usages; + unsigned usages; unsigned offset; - int i; + unsigned i; report = hid_register_report(parser->device, report_type, parser->global.report_id); if (!report) { @@ -255,7 +251,8 @@ static int hid_add_field(struct hid_parser *parser, unsigned report_type, unsign if (!parser->local.usage_index) /* Ignore padding fields */ return 0; - usages = max_t(int, parser->local.usage_index, parser->global.report_count); + usages = max_t(unsigned, parser->local.usage_index, + parser->global.report_count); field = hid_register_field(report, usages, parser->global.report_count); if (!field) @@ -266,13 +263,14 @@ static int hid_add_field(struct hid_parser *parser, unsigned report_type, unsign field->application = hid_lookup_collection(parser, HID_COLLECTION_APPLICATION); for (i = 0; i < usages; i++) { - int j = i; + unsigned j = i; /* Duplicate the last usage we parsed if we have excess values */ if (i >= parser->local.usage_index) j = parser->local.usage_index - 1; field->usage[i].hid = parser->local.usage[j]; field->usage[i].collection_index = parser->local.collection_index[j]; + field->usage[i].usage_index = i; } field->maxusage = usages; @@ -1290,7 +1288,7 @@ int hid_report_raw_event(struct hid_device *hid, int type, u8 *data, int size, goto out; } - if (hid->claimed != HID_CLAIMED_HIDRAW) { + if (hid->claimed != HID_CLAIMED_HIDRAW && report->maxfield) { for (a = 0; a < report->maxfield; a++) hid_input_field(hid, report->field[a], cdata, interrupt); hdrv = hid->driver; diff --git a/drivers/hid/hid-input.c b/drivers/hid/hid-input.c index 2df9cdcba0de..762b2cff72b1 100644 --- a/drivers/hid/hid-input.c +++ b/drivers/hid/hid-input.c @@ -477,6 +477,10 @@ static void hidinput_configure_usage(struct hid_input *hidinput, struct hid_fiel if (field->flags & HID_MAIN_ITEM_CONSTANT) goto ignore; + /* Ignore if report count is out of bounds. */ + if (field->report_count < 1) + goto ignore; + /* only LED usages are supported in output fields */ if (field->report_type == HID_OUTPUT_REPORT && (usage->hid & HID_USAGE_PAGE) != HID_UP_LED) { @@ -1172,7 +1176,11 @@ static void report_features(struct hid_device *hid) rep_enum = &hid->report_enum[HID_FEATURE_REPORT]; list_for_each_entry(rep, &rep_enum->report_list, list) - for (i = 0; i < rep->maxfield; i++) + for (i = 0; i < rep->maxfield; i++) { + /* Ignore if report count is out of bounds. */ + if (rep->field[i]->report_count < 1) + continue; + for (j = 0; j < rep->field[i]->maxusage; j++) { /* Verify if Battery Strength feature is available */ hidinput_setup_battery(hid, HID_FEATURE_REPORT, rep->field[i]); @@ -1181,6 +1189,7 @@ static void report_features(struct hid_device *hid) drv->feature_mapping(hid, rep->field[i], rep->field[i]->usage + j); } + } } static struct hid_input *hidinput_allocate(struct hid_device *hid) From 8d6fa24bc8f6f4441c4bb29f6b7b44393a5c9319 Mon Sep 17 00:00:00 2001 From: Benjamin Tissoires Date: Wed, 11 Sep 2013 21:56:58 +0200 Subject: [PATCH 22/82] HID: multitouch: validate indexes details When working on report indexes, always validate that they are in bounds. Without this, a HID device could report a malicious feature report that could trick the driver into a heap overflow: [ 634.885003] usb 1-1: New USB device found, idVendor=0596, idProduct=0500 ... [ 676.469629] BUG kmalloc-192 (Tainted: G W ): Redzone overwritten Note that we need to change the indexes from s8 to s16 as they can be between -1 and 255. CVE-2013-2897 Cc: stable@vger.kernel.org Signed-off-by: Benjamin Tissoires Acked-by: Kees Cook Signed-off-by: Jiri Kosina --- drivers/hid/hid-multitouch.c | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/drivers/hid/hid-multitouch.c b/drivers/hid/hid-multitouch.c index f4b77f4d5fb8..bb6fe3ee0030 100644 --- a/drivers/hid/hid-multitouch.c +++ b/drivers/hid/hid-multitouch.c @@ -101,9 +101,9 @@ struct mt_device { unsigned last_slot_field; /* the last field of a slot */ unsigned mt_report_id; /* the report ID of the multitouch device */ unsigned pen_report_id; /* the report ID of the pen device */ - __s8 inputmode; /* InputMode HID feature, -1 if non-existent */ - __s8 inputmode_index; /* InputMode HID feature index in the report */ - __s8 maxcontact_report_id; /* Maximum Contact Number HID feature, + __s16 inputmode; /* InputMode HID feature, -1 if non-existent */ + __s16 inputmode_index; /* InputMode HID feature index in the report */ + __s16 maxcontact_report_id; /* Maximum Contact Number HID feature, -1 if non-existent */ __u8 num_received; /* how many contacts we received */ __u8 num_expected; /* expected last contact index */ @@ -317,20 +317,18 @@ static void mt_feature_mapping(struct hid_device *hdev, struct hid_field *field, struct hid_usage *usage) { struct mt_device *td = hid_get_drvdata(hdev); - int i; switch (usage->hid) { case HID_DG_INPUTMODE: - td->inputmode = field->report->id; - td->inputmode_index = 0; /* has to be updated below */ - - for (i=0; i < field->maxusage; i++) { - if (field->usage[i].hid == usage->hid) { - td->inputmode_index = i; - break; - } + /* Ignore if value index is out of bounds. */ + if (usage->usage_index >= field->report_count) { + dev_err(&hdev->dev, "HID_DG_INPUTMODE out of range\n"); + break; } + td->inputmode = field->report->id; + td->inputmode_index = usage->usage_index; + break; case HID_DG_CONTACTMAX: td->maxcontact_report_id = field->report->id; @@ -546,6 +544,10 @@ static int mt_touch_input_mapping(struct hid_device *hdev, struct hid_input *hi, mt_store_field(usage, td, hi); return 1; case HID_DG_CONTACTCOUNT: + /* Ignore if indexes are out of bounds. */ + if (field->index >= field->report->maxfield || + usage->usage_index >= field->report_count) + return 1; td->cc_index = field->index; td->cc_value_index = usage->usage_index; return 1; From b73eb9e6927df1072e7420dedec4fb917d505fa5 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 28 Aug 2013 22:31:44 +0200 Subject: [PATCH 23/82] HID: sensor-hub: validate feature report details A HID device could send a malicious feature report that would cause the sensor-hub HID driver to read past the end of heap allocation, leaking kernel memory contents to the caller. CVE-2013-2898 Signed-off-by: Kees Cook Cc: stable@kernel.org Reviewed-by: Mika Westerberg Signed-off-by: Jiri Kosina --- drivers/hid/hid-sensor-hub.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/hid/hid-sensor-hub.c b/drivers/hid/hid-sensor-hub.c index ca7498107327..aa34755ca205 100644 --- a/drivers/hid/hid-sensor-hub.c +++ b/drivers/hid/hid-sensor-hub.c @@ -221,7 +221,8 @@ int sensor_hub_get_feature(struct hid_sensor_hub_device *hsdev, u32 report_id, mutex_lock(&data->mutex); report = sensor_hub_report(report_id, hsdev->hdev, HID_FEATURE_REPORT); - if (!report || (field_index >= report->maxfield)) { + if (!report || (field_index >= report->maxfield) || + report->field[field_index]->report_count < 1) { ret = -EINVAL; goto done_proc; } From 5e1624f591cc2acbc02d88ab6f1008a57cfb129f Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 28 Aug 2013 22:31:52 +0200 Subject: [PATCH 24/82] HID: picolcd_core: validate output report details MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A HID device could send a malicious output report that would cause the picolcd HID driver to trigger a NULL dereference during attr file writing. [jkosina@suse.cz: changed report->maxfield < 1 to report->maxfield != 1 as suggested by Bruno]. CVE-2013-2899 Signed-off-by: Kees Cook Cc: stable@kernel.org Reviewed-by: Bruno Prémont Acked-by: Bruno Prémont Signed-off-by: Jiri Kosina --- drivers/hid/hid-picolcd_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hid/hid-picolcd_core.c b/drivers/hid/hid-picolcd_core.c index b48092d0e139..acbb021065ec 100644 --- a/drivers/hid/hid-picolcd_core.c +++ b/drivers/hid/hid-picolcd_core.c @@ -290,7 +290,7 @@ static ssize_t picolcd_operation_mode_store(struct device *dev, buf += 10; cnt -= 10; } - if (!report) + if (!report || report->maxfield != 1) return -EINVAL; while (cnt > 0 && (buf[cnt-1] == '\n' || buf[cnt-1] == '\r')) From 58c62d4474fbeda42851553a546e5cb860a036ea Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 28 Aug 2013 22:32:01 +0200 Subject: [PATCH 25/82] HID: check for NULL field when setting values Defensively check that the field to be worked on is not NULL. Signed-off-by: Kees Cook Cc: stable@kernel.org Signed-off-by: Jiri Kosina --- drivers/hid/hid-core.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c index eadcc85e5a62..5b3e4cf7ca6b 100644 --- a/drivers/hid/hid-core.c +++ b/drivers/hid/hid-core.c @@ -1212,7 +1212,12 @@ EXPORT_SYMBOL_GPL(hid_output_report); int hid_set_field(struct hid_field *field, unsigned offset, __s32 value) { - unsigned size = field->report_size; + unsigned size; + + if (!field) + return -1; + + size = field->report_size; hid_dump_input(field->report->device, field->usage + offset, value); From e947bc2827cf608bd42ec9480e759766e5dfe09c Mon Sep 17 00:00:00 2001 From: Jonathan Hamilton Date: Thu, 17 Jul 2014 15:54:44 -0700 Subject: [PATCH 26/82] video: adf: Cleanup sw_sync timeline at adf_device_destroy If a sw_sync timeline was created by ADF (for drivers that do not implement ops->complete_fence) we should clean it up when the ADF device is destroyed. Change-Id: Idd90180fcae56a87111f7d12bdd80190756a6b80 Signed-off-by: Jonathan Hamilton --- drivers/video/adf/adf.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/video/adf/adf.c b/drivers/video/adf/adf.c index 231881c2b355..42c30c05826a 100644 --- a/drivers/video/adf/adf.c +++ b/drivers/video/adf/adf.c @@ -613,6 +613,10 @@ void adf_device_destroy(struct adf_device *dev) } mutex_destroy(&dev->post_lock); mutex_destroy(&dev->client_lock); + + if (dev->timeline) + sync_timeline_destroy(&dev->timeline->obj); + adf_obj_destroy(&dev->base, &adf_devices); } EXPORT_SYMBOL(adf_device_destroy); From 30dc17b47c2f5b12279f1db8935ae2e6d35af8ea Mon Sep 17 00:00:00 2001 From: Colin Cross Date: Tue, 4 Feb 2014 02:15:32 +0000 Subject: [PATCH 27/82] security: select correct default LSM_MMAP_MIN_ADDR on arm on arm64 Binaries compiled for arm may run on arm64 if CONFIG_COMPAT is selected. Set LSM_MMAP_MIN_ADDR to 32768 if ARM64 && COMPAT to prevent selinux failures launching 32-bit static executables that are mapped at 0x8000. Signed-off-by: Colin Cross Acked-by: Will Deacon Acked-by: Eric Paris Acked-by: James Morris Signed-off-by: Catalin Marinas (cherry picked from upstream 3.14 commit 530b099dfe8499d639e7fbcad28c4199e2a720c7) Change-Id: I05d092d3539380e08e7daf0b9d2faae76147b72b Signed-off-by: Dan Willemsen Reviewed-on: http://git-master/r/367837 Signed-off-by: John Stultz --- security/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/security/Kconfig b/security/Kconfig index e9c6ac724fef..beb86b500adf 100644 --- a/security/Kconfig +++ b/security/Kconfig @@ -103,7 +103,7 @@ config INTEL_TXT config LSM_MMAP_MIN_ADDR int "Low address space for LSM to protect from user allocation" depends on SECURITY && SECURITY_SELINUX - default 32768 if ARM + default 32768 if ARM || (ARM64 && COMPAT) default 65536 help This is the portion of low virtual memory which should be protected From 5f4a7e5e43293f27e0b5ff935b32ba303c89de59 Mon Sep 17 00:00:00 2001 From: Anson Jacob Date: Tue, 1 Jul 2014 18:17:20 +0800 Subject: [PATCH 28/82] usb: gadget: f_audio_source: change max ISO packet size Re-applying from https://gitorious.org/shr/linux/commit/eb4c9d2db894c3492c0a848581bd4f6790f93d5f Most USB-AUDIO devices are limited to 256 byte for max iso buffer size. If a IN_EP_MAX_PACKET_SIZE is bigger than a USB-AUDIO device's max iso buffer size, it will cause noise. This patch will prevent this case as possibe by reducing packet size. When using 44.1khz, 2ch, 16bit audio data, if max packet size is bigger than 176 bytes, it's no problem. Credits to: Iliyan Malchev Change-Id: Ic2a1c19ea65d5fb42bf12926b51b255b465d7215 Signed-off-by: Anson Jacob --- drivers/usb/gadget/f_audio_source.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/gadget/f_audio_source.c b/drivers/usb/gadget/f_audio_source.c index 56dcf217cfe5..65760c42d422 100644 --- a/drivers/usb/gadget/f_audio_source.c +++ b/drivers/usb/gadget/f_audio_source.c @@ -24,7 +24,7 @@ #define SAMPLE_RATE 44100 #define FRAMES_PER_MSEC (SAMPLE_RATE / 1000) -#define IN_EP_MAX_PACKET_SIZE 384 +#define IN_EP_MAX_PACKET_SIZE 256 /* Number of requests to allocate */ #define IN_EP_REQ_COUNT 4 From dc0cf1216b3db4617610ecee67185de0dade9552 Mon Sep 17 00:00:00 2001 From: Anson Jacob Date: Mon, 23 Jun 2014 19:14:01 +0800 Subject: [PATCH 29/82] usb: gadget: f_audio_source: Fixed USB Audio Class Interface Descriptor Fixed Android Issue #56549. When both Vendor Class and Audio Class are activated for AOA 2.0, the baInterfaceNr of the AudioControl Interface Descriptor points to wrong interface numbers. They should be pointing to Audio Control Device and Audio Streaming interfaces. Replaced baInterfaceNr with the correct value. Change-Id: Iaa083f3d97c1f0fc9481bf87852b2b51278a6351 Signed-off-by: Anson Jacob --- drivers/usb/gadget/f_audio_source.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/usb/gadget/f_audio_source.c b/drivers/usb/gadget/f_audio_source.c index 65760c42d422..21ced13c83d8 100644 --- a/drivers/usb/gadget/f_audio_source.c +++ b/drivers/usb/gadget/f_audio_source.c @@ -580,12 +580,18 @@ audio_bind(struct usb_configuration *c, struct usb_function *f) goto fail; ac_interface_desc.bInterfaceNumber = status; + /* AUDIO_AC_INTERFACE */ + ac_header_desc.baInterfaceNr[0] = status; + status = usb_interface_id(c, f); if (status < 0) goto fail; as_interface_alt_0_desc.bInterfaceNumber = status; as_interface_alt_1_desc.bInterfaceNumber = status; + /* AUDIO_AS_INTERFACE */ + ac_header_desc.baInterfaceNr[1] = status; + status = -ENODEV; /* allocate our endpoint */ From 687f999e1fbd3b553bccbd7f52996ae56c5e327e Mon Sep 17 00:00:00 2001 From: JP Abgrall Date: Wed, 23 Jul 2014 16:55:07 -0700 Subject: [PATCH 30/82] ext4: Add support for FIDTRIM, a best-effort ioctl for deep discard trim * What This provides an interface for issuing an FITRIM which uses the secure discard instead of just a discard. Only the eMMC command is "secure", and not how the FS uses it: due to the fact that the FS might reassign a region somewhere else, the original deleted data will not be affected by the "trim" which only handles un-used regions. So we'll just call it "deep discard", and note that this is a "best effort" cleanup. * Why Once in a while, We want to be able to cleanup most of the unused blocks after erasing a bunch of files. We don't want to constantly secure-discard via a mount option. From an eMMC spec perspective, it tells the device to really get rid of all the data for the specified blocks and not just put them back into the pool of free ones (unlike the normal TRIM). The eMMC spec says the secure trim handling must make sure the data (and metadata) is not available anymore. A simple TRIM doesn't clear the data, it just puts blocks in the free pool. JEDEC Standard No. 84-A441 7.6.9 Secure Erase 7.6.10 Secure Trim From an FS perspective, it is acceptable to leave some data behind. - directory entries related to deleted files - databases entries related to deleted files - small-file data stored in inode extents - blocks held by the FS waiting to be re-used (mitigated by sync). - blocks reassigned by the FS prior to FIDTRIM. Change-Id: I676a1404a80130d93930c84898360f2e6fb2f81e Signed-off-by: Geremy Condra Signed-off-by: JP Abgrall --- fs/ext4/ext4.h | 3 ++- fs/ext4/ioctl.c | 6 +++++- fs/ext4/mballoc.c | 28 ++++++++++++++++++---------- include/uapi/linux/fs.h | 2 ++ 4 files changed, 27 insertions(+), 12 deletions(-) diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 5aae3d12d400..09ace38b451a 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -2050,7 +2050,8 @@ extern int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t i, struct ext4_group_desc *desc); extern int ext4_group_add_blocks(handle_t *handle, struct super_block *sb, ext4_fsblk_t block, unsigned long count); -extern int ext4_trim_fs(struct super_block *, struct fstrim_range *); +extern int ext4_trim_fs(struct super_block *, struct fstrim_range *, + unsigned long blkdev_flags); /* inode.c */ struct buffer_head *ext4_getblk(handle_t *, struct inode *, diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index 9491ac0590f7..d01a05593be5 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -594,11 +594,13 @@ resizefs_out: return err; } + case FIDTRIM: case FITRIM: { struct request_queue *q = bdev_get_queue(sb->s_bdev); struct fstrim_range range; int ret = 0; + int flags = cmd == FIDTRIM ? BLKDEV_DISCARD_SECURE : 0; if (!capable(CAP_SYS_ADMIN)) return -EPERM; @@ -606,13 +608,15 @@ resizefs_out: if (!blk_queue_discard(q)) return -EOPNOTSUPP; + if ((flags & BLKDEV_DISCARD_SECURE) && !blk_queue_secdiscard(q)) + return -EOPNOTSUPP; if (copy_from_user(&range, (struct fstrim_range __user *)arg, sizeof(range))) return -EFAULT; range.minlen = max((unsigned int)range.minlen, q->limits.discard_granularity); - ret = ext4_trim_fs(sb, &range); + ret = ext4_trim_fs(sb, &range, flags); if (ret < 0) return ret; diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index def84082a9a9..50375b0f6022 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -2705,7 +2705,8 @@ int ext4_mb_release(struct super_block *sb) } static inline int ext4_issue_discard(struct super_block *sb, - ext4_group_t block_group, ext4_grpblk_t cluster, int count) + ext4_group_t block_group, ext4_grpblk_t cluster, int count, + unsigned long flags) { ext4_fsblk_t discard_block; @@ -2714,7 +2715,7 @@ static inline int ext4_issue_discard(struct super_block *sb, count = EXT4_C2B(EXT4_SB(sb), count); trace_ext4_discard_blocks(sb, (unsigned long long) discard_block, count); - return sb_issue_discard(sb, discard_block, count, GFP_NOFS, 0); + return sb_issue_discard(sb, discard_block, count, GFP_NOFS, flags); } /* @@ -2736,7 +2737,7 @@ static void ext4_free_data_callback(struct super_block *sb, if (test_opt(sb, DISCARD)) { err = ext4_issue_discard(sb, entry->efd_group, entry->efd_start_cluster, - entry->efd_count); + entry->efd_count, 0); if (err && err != -EOPNOTSUPP) ext4_msg(sb, KERN_WARNING, "discard request in" " group:%d block:%d count:%d failed" @@ -4755,7 +4756,8 @@ do_more: * them with group lock_held */ if (test_opt(sb, DISCARD)) { - err = ext4_issue_discard(sb, block_group, bit, count); + err = ext4_issue_discard(sb, block_group, bit, count, + 0); if (err && err != -EOPNOTSUPP) ext4_msg(sb, KERN_WARNING, "discard request in" " group:%d block:%d count:%lu failed" @@ -4950,13 +4952,15 @@ error_return: * @count: number of blocks to TRIM * @group: alloc. group we are working with * @e4b: ext4 buddy for the group + * @blkdev_flags: flags for the block device * * Trim "count" blocks starting at "start" in the "group". To assure that no * one will allocate those blocks, mark it as used in buddy bitmap. This must * be called with under the group lock. */ static int ext4_trim_extent(struct super_block *sb, int start, int count, - ext4_group_t group, struct ext4_buddy *e4b) + ext4_group_t group, struct ext4_buddy *e4b, + unsigned long blkdev_flags) { struct ext4_free_extent ex; int ret = 0; @@ -4975,7 +4979,7 @@ static int ext4_trim_extent(struct super_block *sb, int start, int count, */ mb_mark_used(e4b, &ex); ext4_unlock_group(sb, group); - ret = ext4_issue_discard(sb, group, start, count); + ret = ext4_issue_discard(sb, group, start, count, blkdev_flags); ext4_lock_group(sb, group); mb_free_blocks(NULL, e4b, start, ex.fe_len); return ret; @@ -4988,6 +4992,7 @@ static int ext4_trim_extent(struct super_block *sb, int start, int count, * @start: first group block to examine * @max: last group block to examine * @minblocks: minimum extent block count + * @blkdev_flags: flags for the block device * * ext4_trim_all_free walks through group's buddy bitmap searching for free * extents. When the free block is found, ext4_trim_extent is called to TRIM @@ -5002,7 +5007,7 @@ static int ext4_trim_extent(struct super_block *sb, int start, int count, static ext4_grpblk_t ext4_trim_all_free(struct super_block *sb, ext4_group_t group, ext4_grpblk_t start, ext4_grpblk_t max, - ext4_grpblk_t minblocks) + ext4_grpblk_t minblocks, unsigned long blkdev_flags) { void *bitmap; ext4_grpblk_t next, count = 0, free_count = 0; @@ -5035,7 +5040,8 @@ ext4_trim_all_free(struct super_block *sb, ext4_group_t group, if ((next - start) >= minblocks) { ret = ext4_trim_extent(sb, start, - next - start, group, &e4b); + next - start, group, &e4b, + blkdev_flags); if (ret && ret != -EOPNOTSUPP) break; ret = 0; @@ -5077,6 +5083,7 @@ out: * ext4_trim_fs() -- trim ioctl handle function * @sb: superblock for filesystem * @range: fstrim_range structure + * @blkdev_flags: flags for the block device * * start: First Byte to trim * len: number of Bytes to trim from start @@ -5085,7 +5092,8 @@ out: * start to start+len. For each such a group ext4_trim_all_free function * is invoked to trim all free space. */ -int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range) +int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range, + unsigned long blkdev_flags) { struct ext4_group_info *grp; ext4_group_t group, first_group, last_group; @@ -5141,7 +5149,7 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range) if (grp->bb_free >= minlen) { cnt = ext4_trim_all_free(sb, group, first_cluster, - end, minlen); + end, minlen, blkdev_flags); if (cnt < 0) { ret = cnt; break; diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h index a4ed56cf0eac..5014a5c472ed 100644 --- a/include/uapi/linux/fs.h +++ b/include/uapi/linux/fs.h @@ -154,6 +154,8 @@ struct inodes_stat_t { #define FITHAW _IOWR('X', 120, int) /* Thaw */ #define FITRIM _IOWR('X', 121, struct fstrim_range) /* Trim */ +#define FIDTRIM _IOWR('f', 128, struct fstrim_range) /* Deep discard trim */ + #define FS_IOC_GETFLAGS _IOR('f', 1, long) #define FS_IOC_SETFLAGS _IOW('f', 2, long) #define FS_IOC_GETVERSION _IOR('v', 1, long) From 0beec5004ce4325838b327f45ddf716706890e75 Mon Sep 17 00:00:00 2001 From: Laura Abbott Date: Fri, 24 Jan 2014 15:19:49 -0800 Subject: [PATCH 31/82] staging: android: ashmem: Avoid deadlock with mmap/shrink Both ashmem_mmap and ashmem_shrink take the ashmem_lock. It may be possible for ashmem_mmap to invoke ashmem_shrink: -000|mutex_lock(lock = 0x0) -001|ashmem_shrink(?, sc = 0x0) <--- try to take ashmem_mutex again -002|shrink_slab(shrink = 0xDA5F1CC0, nr_pages_scanned = 0, lru_pages -002|= -002|124) -003|try_to_free_pages(zonelist = 0x0, ?, ?, ?) -004|__alloc_pages_nodemask(gfp_mask = 21200, order = 1, zonelist = -004|0xC11D0940, -005|new_slab(s = 0xE4841E80, ?, node = -1) -006|__slab_alloc.isra.43.constprop.50(s = 0xE4841E80, gfpflags = -006|2148925462, ad -007|kmem_cache_alloc(s = 0xE4841E80, gfpflags = 208) -008|shmem_alloc_inode(?) -009|alloc_inode(sb = 0xE480E800) -010|new_inode_pseudo(?) -011|new_inode(?) -012|shmem_get_inode(sb = 0xE480E800, dir = 0x0, ?, dev = 0, flags = -012|187) -013|shmem_file_setup(?, ?, flags = 187) -014|ashmem_mmap(?, vma = 0xC5D64210) <---- Acquire ashmem_mutex -015|mmap_region(file = 0xDF8E2C00, addr = 1772974080, len = 233472, -015|flags = 57, -016|sys_mmap_pgoff(addr = 0, len = 230400, prot = 3, flags = 1, fd = -016|157, pgoff -017|ret_fast_syscall(asm) -->|exception -018|NUR:0x40097508(asm) ---|end of frame Avoid this deadlock by using mutex_trylock in ashmem_shrink; if the mutex is already held, do not attempt to shrink. Change-Id: I222bbf55856d5849da813b730de0636c80966c8e Reported-by: Matt Wagantall Reported-by: Syed Rameez Mustafa Reported-by: Osvaldo Banuelos Reported-by: Subbaraman Narayanamurthy Signed-off-by: Laura Abbott --- drivers/staging/android/ashmem.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/staging/android/ashmem.c b/drivers/staging/android/ashmem.c index 3511b0840362..ccaef8b48eba 100644 --- a/drivers/staging/android/ashmem.c +++ b/drivers/staging/android/ashmem.c @@ -363,7 +363,9 @@ static int ashmem_shrink(struct shrinker *s, struct shrink_control *sc) if (!sc->nr_to_scan) return lru_count; - mutex_lock(&ashmem_mutex); + if (!mutex_trylock(&ashmem_mutex)) + return -1; + list_for_each_entry_safe(range, next, &ashmem_lru_list, lru) { loff_t start = range->pgstart * PAGE_SIZE; loff_t end = (range->pgend + 1) * PAGE_SIZE; From 28cb7116eebfa0e4b36b154df0f1c6a251f3b97b Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 22 May 2013 18:28:38 +0200 Subject: [PATCH 32/82] firmware: Avoid deadlock of usermodehelper lock at shutdown When a system goes to reboot/shutdown, it tries to disable the usermode helper via usermodehelper_disable(). This might be blocked when a driver tries to load a firmware beforehand and it's stuck by some reason. For example, dell_rbu driver loads the firmware in non-hotplug mode and waits for user-space clearing the loading sysfs flag. If user-space doesn't clear the flag, it waits forever, thus blocks the reboot, too. As a workaround, in this patch, the firmware class driver registers a reboot notifier so that it can abort all pending f/w bufs before issuing usermodehelper_disable(). Signed-off-by: Takashi Iwai Acked-by: Ming Lei Signed-off-by: Greg Kroah-Hartman Conflicts: drivers/base/firmware_class.c Change-Id: I7ff6c198cd34090e55845b9d4035b1e5dc86226b --- drivers/base/firmware_class.c | 43 ++++++++++++++++++++++++++++++++--- 1 file changed, 40 insertions(+), 3 deletions(-) diff --git a/drivers/base/firmware_class.c b/drivers/base/firmware_class.c index 01e21037d8fe..55d682e6ecea 100644 --- a/drivers/base/firmware_class.c +++ b/drivers/base/firmware_class.c @@ -27,6 +27,7 @@ #include #include #include +#include #include @@ -130,6 +131,7 @@ struct firmware_buf { struct page **pages; int nr_pages; int page_array_size; + struct list_head pending_list; #endif char fw_id[]; }; @@ -171,6 +173,9 @@ static struct firmware_buf *__allocate_fw_buf(const char *fw_name, strcpy(buf->fw_id, fw_name); buf->fwc = fwc; init_completion(&buf->completion); +#ifdef CONFIG_FW_LOADER_USER_HELPER + INIT_LIST_HEAD(&buf->pending_list); +#endif pr_debug("%s: fw-%s buf=%p\n", __func__, fw_name, buf); @@ -446,10 +451,8 @@ static struct firmware_priv *to_firmware_priv(struct device *dev) return container_of(dev, struct firmware_priv, dev); } -static void fw_load_abort(struct firmware_priv *fw_priv) +static void __fw_load_abort(struct firmware_buf *buf) { - struct firmware_buf *buf = fw_priv->buf; - /* * There is a small window in which user can write to 'loading' * between loading done and disappearance of 'loading' @@ -457,8 +460,16 @@ static void fw_load_abort(struct firmware_priv *fw_priv) if (test_bit(FW_STATUS_DONE, &buf->status)) return; + list_del_init(&buf->pending_list); set_bit(FW_STATUS_ABORT, &buf->status); complete_all(&buf->completion); +} + +static void fw_load_abort(struct firmware_priv *fw_priv) +{ + struct firmware_buf *buf = fw_priv->buf; + + __fw_load_abort(buf); /* avoid user action after loading abort */ fw_priv->buf = NULL; @@ -467,6 +478,25 @@ static void fw_load_abort(struct firmware_priv *fw_priv) #define is_fw_load_aborted(buf) \ test_bit(FW_STATUS_ABORT, &(buf)->status) +static LIST_HEAD(pending_fw_head); + +/* reboot notifier for avoid deadlock with usermode_lock */ +static int fw_shutdown_notify(struct notifier_block *unused1, + unsigned long unused2, void *unused3) +{ + mutex_lock(&fw_lock); + while (!list_empty(&pending_fw_head)) + __fw_load_abort(list_first_entry(&pending_fw_head, + struct firmware_buf, + pending_list)); + mutex_unlock(&fw_lock); + return NOTIFY_DONE; +} + +static struct notifier_block fw_shutdown_nb = { + .notifier_call = fw_shutdown_notify, +}; + static ssize_t firmware_timeout_show(struct class *class, struct class_attribute *attr, char *buf) @@ -619,6 +649,7 @@ static ssize_t firmware_loading_store(struct device *dev, * is completed. * */ fw_map_pages_buf(fw_buf); + list_del_init(&fw_buf->pending_list); complete_all(&fw_buf->completion); break; } @@ -868,6 +899,10 @@ static int _request_firmware_load(struct firmware_priv *fw_priv, bool uevent, kobject_uevent(&fw_priv->dev.kobj, KOBJ_ADD); } + mutex_lock(&fw_lock); + list_add(&buf->pending_list, &pending_fw_head); + mutex_unlock(&fw_lock); + wait_for_completion(&buf->completion); cancel_delayed_work_sync(&fw_priv->timeout_work); @@ -1526,6 +1561,7 @@ static int __init firmware_class_init(void) { fw_cache_init(); #ifdef CONFIG_FW_LOADER_USER_HELPER + register_reboot_notifier(&fw_shutdown_nb); return class_register(&firmware_class); #else return 0; @@ -1539,6 +1575,7 @@ static void __exit firmware_class_exit(void) unregister_pm_notifier(&fw_cache.pm_notify); #endif #ifdef CONFIG_FW_LOADER_USER_HELPER + unregister_reboot_notifier(&fw_shutdown_nb); class_unregister(&firmware_class); #endif } From df77f00e9623348a89970c00bdb0b3d72789bfa8 Mon Sep 17 00:00:00 2001 From: Maxime Bizon Date: Thu, 29 Aug 2013 20:28:13 +0200 Subject: [PATCH 33/82] firmware loader: fix pending_fw_head list corruption Got the following oops just before reboot: Unable to handle kernel NULL pointer dereference at virtual address 00000000 [<8028d300>] (__list_del_entry+0x44/0xac) [<802e3320>] (__fw_load_abort.part.13+0x1c/0x50) [<802e337c>] (fw_shutdown_notify+0x28/0x50) [<80034f80>] (notifier_call_chain.isra.1+0x5c/0x9c) [<800350ec>] (__blocking_notifier_call_chain+0x44/0x58) [<80035114>] (blocking_notifier_call_chain+0x14/0x18) [<80035d64>] (kernel_restart_prepare+0x14/0x38) [<80035d94>] (kernel_restart+0xc/0x50) The following race condition triggers here: _request_firmware_load() device_create_file(...) kobject_uevent(...) (schedule) (resume) firmware_loading_store(1) firmware_loading_store(0) list_del_init(&buf->pending_list) (schedule) (resume) list_add(&buf->pending_list, &pending_fw_head); wait_for_completion(&buf->completion); causing an oops later when walking pending_list after the firmware has been released. The proposed fix is to move the list_add() before sysfs attribute creation. Signed-off-by: Maxime Bizon Acked-by: Ming Lei Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/base/firmware_class.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/base/firmware_class.c b/drivers/base/firmware_class.c index 55d682e6ecea..d7872b96019d 100644 --- a/drivers/base/firmware_class.c +++ b/drivers/base/firmware_class.c @@ -884,8 +884,15 @@ static int _request_firmware_load(struct firmware_priv *fw_priv, bool uevent, goto err_del_dev; } + mutex_lock(&fw_lock); + list_add(&buf->pending_list, &pending_fw_head); + mutex_unlock(&fw_lock); + retval = device_create_file(f_dev, &dev_attr_loading); if (retval) { + mutex_lock(&fw_lock); + list_del_init(&buf->pending_list); + mutex_unlock(&fw_lock); dev_err(f_dev, "%s: device_create_file failed\n", __func__); goto err_del_bin_attr; } @@ -899,10 +906,6 @@ static int _request_firmware_load(struct firmware_priv *fw_priv, bool uevent, kobject_uevent(&fw_priv->dev.kobj, KOBJ_ADD); } - mutex_lock(&fw_lock); - list_add(&buf->pending_list, &pending_fw_head); - mutex_unlock(&fw_lock); - wait_for_completion(&buf->completion); cancel_delayed_work_sync(&fw_priv->timeout_work); From c3b04f9d63bd8c1366cbe971b38759fa9938830b Mon Sep 17 00:00:00 2001 From: xerox_lin Date: Mon, 18 Aug 2014 21:54:23 +0800 Subject: [PATCH 34/82] USB: rndis: Free the rndis response queue during REMOTE_NDIS_RESET_MSG When rndis data transfer is in progress, some Windows7 Host PC is not sending the GET_ENCAPSULATED_RESPONSE command for receiving the response for the previous SEND_ENCAPSULATED_COMMAND processed. The rndis function driver appends each response for the SEND_ENCAPSULATED_COMMAND in a queue. As the above process got corrupted, the Host sends a REMOTE_NDIS_RESET_MSG command to do a soft-reset. As the rndis response queue is not freed, the previous response is sent as a part of this REMOTE_NDIS_RESET_MSG's reset response and the Host blocks any more Rndis transfers. Hence free the rndis response queue as a part of this soft-reset so that the current response for REMOTE_NDIS_RESET_MSG is sent properly during the response command. Change-Id: I8eff3849db452fe01b7d1fe4140ef1f1ad3f4fd4 Signed-off-by: Rajkumar Raghupathy Signed-off-by: Xerox Lin --- drivers/usb/gadget/rndis.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/usb/gadget/rndis.c b/drivers/usb/gadget/rndis.c index 693f0c24d516..5378dc6574fb 100644 --- a/drivers/usb/gadget/rndis.c +++ b/drivers/usb/gadget/rndis.c @@ -686,6 +686,12 @@ static int rndis_reset_response(int configNr, rndis_reset_msg_type *buf) rndis_reset_cmplt_type *resp; rndis_resp_t *r; struct rndis_params *params = rndis_per_dev_params + configNr; + u32 length; + u8 *xbuf; + + /* drain the response queue */ + while ((xbuf = rndis_get_next_response(configNr, &length))) + rndis_free_response(configNr, xbuf); r = rndis_add_response(configNr, sizeof(rndis_reset_cmplt_type)); if (!r) From 9bc0c15675840178cee1486c2a7f25faead1518e Mon Sep 17 00:00:00 2001 From: Colin Cross Date: Tue, 5 Aug 2014 12:05:17 -0700 Subject: [PATCH 35/82] mm: fix prctl_set_vma_anon_name prctl_set_vma_anon_name could attempt to set the name across two vmas at the same time due to a typo, which might corrupt the vma list. Fix it to use tmp instead of end to limit the name setting to a single vma at a time. Change-Id: Ie32d8ddb0fd547efbeedd6528acdab5ca5b308b4 Reported-by: Jed Davis Signed-off-by: Colin Cross --- kernel/sys.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/sys.c b/kernel/sys.c index ab7fda5fbe18..65d3e55bd282 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -2187,7 +2187,7 @@ static int prctl_set_vma_anon_name(unsigned long start, unsigned long end, tmp = end; /* Here vma->vm_start <= start < tmp <= (end|vma->vm_end). */ - error = prctl_update_vma_anon_name(vma, &prev, start, end, + error = prctl_update_vma_anon_name(vma, &prev, start, tmp, (const char __user *)arg); if (error) return error; From a3d5a3f5affd45d9aa21daf04d4eb543fd3ed444 Mon Sep 17 00:00:00 2001 From: Dmitry Shmidt Date: Fri, 22 Aug 2014 14:40:18 -0700 Subject: [PATCH 36/82] Add flags parameter to get_country_code template Change-Id: Ic3f173db144a301ea104f544fc8ec723241c1d59 Signed-off-by: Dmitry Shmidt --- include/linux/wlan_plat.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/include/linux/wlan_plat.h b/include/linux/wlan_plat.h index 40ec3482d1ef..8ad2dbd0c296 100644 --- a/include/linux/wlan_plat.h +++ b/include/linux/wlan_plat.h @@ -15,13 +15,15 @@ #ifndef _LINUX_WLAN_PLAT_H_ #define _LINUX_WLAN_PLAT_H_ +#define WLAN_PLAT_NODFS_FLAG 0x01 + struct wifi_platform_data { int (*set_power)(int val); int (*set_reset)(int val); int (*set_carddetect)(int val); void *(*mem_prealloc)(int section, unsigned long size); int (*get_mac_addr)(unsigned char *buf); - void *(*get_country_code)(char *ccode); + void *(*get_country_code)(char *ccode, u32 flags); }; #endif From a327898fa354ae18e9fe07d3e203304f4ef5c0d2 Mon Sep 17 00:00:00 2001 From: xerox_lin Date: Thu, 14 Aug 2014 14:48:44 +0800 Subject: [PATCH 37/82] usb: Add support for rndis uplink aggregation RNDIS protocol supports data aggregation on uplink and can help reduce mips by reducing number of interrupts on device. Throughput also improved by 20-30%. Aggregation is disabled by setting aggregation packet size to 1. To help better UL throughput, set as ul aggregation support to 3 rndis packets by default. It can be configured via module parameter: rndis_ul_max_pkt_per_xfer. Change-Id: I0b62a21a5c3ceb6b04933d0d6da33301dbafe493 Signed-off-by: Vamsi Krishna Signed-off-by: Xerox Lin --- drivers/usb/gadget/f_rndis.c | 13 ++++- drivers/usb/gadget/rndis.c | 105 +++++++++++++++++++++++++++++------ drivers/usb/gadget/rndis.h | 2 + drivers/usb/gadget/u_ether.c | 6 ++ drivers/usb/gadget/u_ether.h | 4 ++ 5 files changed, 111 insertions(+), 19 deletions(-) diff --git a/drivers/usb/gadget/f_rndis.c b/drivers/usb/gadget/f_rndis.c index 21c5ee2482d6..7646a564bfda 100644 --- a/drivers/usb/gadget/f_rndis.c +++ b/drivers/usb/gadget/f_rndis.c @@ -25,7 +25,6 @@ #include "u_ether.h" #include "rndis.h" - /* * This function is an RNDIS Ethernet port -- a Microsoft protocol that's * been promoted instead of the standard CDC Ethernet. The published RNDIS @@ -67,6 +66,16 @@ * - MS-Windows drivers sometimes emit undocumented requests. */ +static bool rndis_multipacket_dl_disable; +module_param(rndis_multipacket_dl_disable, bool, S_IRUGO|S_IWUSR); +MODULE_PARM_DESC(rndis_multipacket_dl_disable, + "Disable RNDIS Multi-packet support in DownLink"); + +static unsigned int rndis_ul_max_pkt_per_xfer = 3; +module_param(rndis_ul_max_pkt_per_xfer, uint, S_IRUGO | S_IWUSR); +MODULE_PARM_DESC(rndis_ul_max_pkt_per_xfer, + "Maximum packets per transfer for UL aggregation"); + struct f_rndis { struct gether port; u8 ctrl_id, data_id; @@ -748,6 +757,7 @@ rndis_bind(struct usb_configuration *c, struct usb_function *f) rndis_set_param_medium(rndis->config, RNDIS_MEDIUM_802_3, 0); rndis_set_host_mac(rndis->config, rndis->ethaddr); + rndis_set_max_pkt_xfer(rndis->config, rndis_ul_max_pkt_per_xfer); if (rndis->manufacturer && rndis->vendorID && rndis_set_param_vendor(rndis->config, rndis->vendorID, @@ -854,6 +864,7 @@ rndis_bind_config_vendor(struct usb_configuration *c, u8 ethaddr[ETH_ALEN], rndis->port.header_len = sizeof(struct rndis_packet_msg_type); rndis->port.wrap = rndis_add_header; rndis->port.unwrap = rndis_rm_hdr; + rndis->port.ul_max_pkts_per_xfer = rndis_ul_max_pkt_per_xfer; rndis->port.func.name = "rndis"; rndis->port.func.strings = rndis_strings; diff --git a/drivers/usb/gadget/rndis.c b/drivers/usb/gadget/rndis.c index 5378dc6574fb..cb2767df3fba 100644 --- a/drivers/usb/gadget/rndis.c +++ b/drivers/usb/gadget/rndis.c @@ -59,6 +59,16 @@ MODULE_PARM_DESC (rndis_debug, "enable debugging"); #define RNDIS_MAX_CONFIGS 1 +int rndis_ul_max_pkt_per_xfer_rcvd; +module_param(rndis_ul_max_pkt_per_xfer_rcvd, int, S_IRUGO); +MODULE_PARM_DESC(rndis_ul_max_pkt_per_xfer_rcvd, + "Max num of REMOTE_NDIS_PACKET_MSGs received in a single transfer"); + +int rndis_ul_max_xfer_size_rcvd; +module_param(rndis_ul_max_xfer_size_rcvd, int, S_IRUGO); +MODULE_PARM_DESC(rndis_ul_max_xfer_size_rcvd, + "Max size of bus transfer received"); + static rndis_params rndis_per_dev_params[RNDIS_MAX_CONFIGS]; @@ -585,12 +595,12 @@ static int rndis_init_response(int configNr, rndis_init_msg_type *buf) resp->MinorVersion = cpu_to_le32(RNDIS_MINOR_VERSION); resp->DeviceFlags = cpu_to_le32(RNDIS_DF_CONNECTIONLESS); resp->Medium = cpu_to_le32(RNDIS_MEDIUM_802_3); - resp->MaxPacketsPerTransfer = cpu_to_le32(1); - resp->MaxTransferSize = cpu_to_le32( - params->dev->mtu + resp->MaxPacketsPerTransfer = cpu_to_le32(params->max_pkt_per_xfer); + resp->MaxTransferSize = cpu_to_le32(params->max_pkt_per_xfer * + (params->dev->mtu + sizeof(struct ethhdr) + sizeof(struct rndis_packet_msg_type) - + 22); + + 22)); resp->PacketAlignmentFactor = cpu_to_le32(0); resp->AFListOffset = cpu_to_le32(0); resp->AFListSize = cpu_to_le32(0); @@ -916,6 +926,8 @@ int rndis_set_param_dev(u8 configNr, struct net_device *dev, u16 *cdc_filter) rndis_per_dev_params[configNr].dev = dev; rndis_per_dev_params[configNr].filter = cdc_filter; + rndis_ul_max_xfer_size_rcvd = 0; + rndis_ul_max_pkt_per_xfer_rcvd = 0; return 0; } @@ -942,6 +954,13 @@ int rndis_set_param_medium(u8 configNr, u32 medium, u32 speed) return 0; } +void rndis_set_max_pkt_xfer(u8 configNr, u8 max_pkt_per_xfer) +{ + pr_debug("%s:\n", __func__); + + rndis_per_dev_params[configNr].max_pkt_per_xfer = max_pkt_per_xfer; +} + void rndis_add_hdr(struct sk_buff *skb) { struct rndis_packet_msg_type *header; @@ -1014,23 +1033,73 @@ int rndis_rm_hdr(struct gether *port, struct sk_buff *skb, struct sk_buff_head *list) { - /* tmp points to a struct rndis_packet_msg_type */ - __le32 *tmp = (void *)skb->data; + int num_pkts = 1; - /* MessageType, MessageLength */ - if (cpu_to_le32(RNDIS_MSG_PACKET) - != get_unaligned(tmp++)) { - dev_kfree_skb_any(skb); - return -EINVAL; - } - tmp++; + if (skb->len > rndis_ul_max_xfer_size_rcvd) + rndis_ul_max_xfer_size_rcvd = skb->len; - /* DataOffset, DataLength */ - if (!skb_pull(skb, get_unaligned_le32(tmp++) + 8)) { - dev_kfree_skb_any(skb); - return -EOVERFLOW; + while (skb->len) { + struct rndis_packet_msg_type *hdr; + struct sk_buff *skb2; + u32 msg_len, data_offset, data_len; + + /* some rndis hosts send extra byte to avoid zlp, ignore it */ + if (skb->len == 1) { + dev_kfree_skb_any(skb); + return 0; + } + + if (skb->len < sizeof *hdr) { + pr_err("invalid rndis pkt: skblen:%u hdr_len:%u", + skb->len, sizeof *hdr); + dev_kfree_skb_any(skb); + return -EINVAL; + } + + hdr = (void *)skb->data; + msg_len = le32_to_cpu(hdr->MessageLength); + data_offset = le32_to_cpu(hdr->DataOffset); + data_len = le32_to_cpu(hdr->DataLength); + + if (skb->len < msg_len || + ((data_offset + data_len + 8) > msg_len)) { + pr_err("invalid rndis message: %d/%d/%d/%d, len:%d\n", + le32_to_cpu(hdr->MessageType), + msg_len, data_offset, data_len, skb->len); + dev_kfree_skb_any(skb); + return -EOVERFLOW; + } + if (le32_to_cpu(hdr->MessageType) != RNDIS_MSG_PACKET) { + pr_err("invalid rndis message: %d/%d/%d/%d, len:%d\n", + le32_to_cpu(hdr->MessageType), + msg_len, data_offset, data_len, skb->len); + dev_kfree_skb_any(skb); + return -EINVAL; + } + + skb_pull(skb, data_offset + 8); + + if (msg_len == skb->len) { + skb_trim(skb, data_len); + break; + } + + skb2 = skb_clone(skb, GFP_ATOMIC); + if (!skb2) { + pr_err("%s:skb clone failed\n", __func__); + dev_kfree_skb_any(skb); + return -ENOMEM; + } + + skb_pull(skb, msg_len - sizeof *hdr); + skb_trim(skb2, data_len); + skb_queue_tail(list, skb2); + + num_pkts++; } - skb_trim(skb, get_unaligned_le32(tmp++)); + + if (num_pkts > rndis_ul_max_pkt_per_xfer_rcvd) + rndis_ul_max_pkt_per_xfer_rcvd = num_pkts; skb_queue_tail(list, skb); return 0; diff --git a/drivers/usb/gadget/rndis.h b/drivers/usb/gadget/rndis.h index 0647f2f34e89..12045b31a311 100644 --- a/drivers/usb/gadget/rndis.h +++ b/drivers/usb/gadget/rndis.h @@ -189,6 +189,7 @@ typedef struct rndis_params struct net_device *dev; u32 vendorID; + u8 max_pkt_per_xfer; const char *vendorDescr; void (*resp_avail)(void *v); void *v; @@ -204,6 +205,7 @@ int rndis_set_param_dev (u8 configNr, struct net_device *dev, int rndis_set_param_vendor (u8 configNr, u32 vendorID, const char *vendorDescr); int rndis_set_param_medium (u8 configNr, u32 medium, u32 speed); +void rndis_set_max_pkt_xfer(u8 configNr, u8 max_pkt_per_xfer); void rndis_add_hdr (struct sk_buff *skb); int rndis_rm_hdr(struct gether *port, struct sk_buff *skb, struct sk_buff_head *list); diff --git a/drivers/usb/gadget/u_ether.c b/drivers/usb/gadget/u_ether.c index 4b76124ce96b..801e326e54c0 100644 --- a/drivers/usb/gadget/u_ether.c +++ b/drivers/usb/gadget/u_ether.c @@ -64,6 +64,7 @@ struct eth_dev { struct sk_buff_head rx_frames; unsigned header_len; + unsigned ul_max_pkts_per_xfer; struct sk_buff *(*wrap)(struct gether *, struct sk_buff *skb); int (*unwrap)(struct gether *, struct sk_buff *skb, @@ -226,9 +227,13 @@ rx_submit(struct eth_dev *dev, struct usb_request *req, gfp_t gfp_flags) size += out->maxpacket - 1; size -= size % out->maxpacket; + if (dev->ul_max_pkts_per_xfer) + size *= dev->ul_max_pkts_per_xfer; + if (dev->port_usb->is_fixed) size = max_t(size_t, size, dev->port_usb->fixed_out_len); + DBG(dev, "%s: size: %d\n", __func__, size); skb = alloc_skb(size + NET_IP_ALIGN, gfp_flags); if (skb == NULL) { DBG(dev, "no rx skb\n"); @@ -882,6 +887,7 @@ struct net_device *gether_connect(struct gether *link) dev->header_len = link->header_len; dev->unwrap = link->unwrap; dev->wrap = link->wrap; + dev->ul_max_pkts_per_xfer = link->ul_max_pkts_per_xfer; spin_lock(&dev->lock); dev->port_usb = link; diff --git a/drivers/usb/gadget/u_ether.h b/drivers/usb/gadget/u_ether.h index 02522338a708..ce803d415887 100644 --- a/drivers/usb/gadget/u_ether.h +++ b/drivers/usb/gadget/u_ether.h @@ -54,6 +54,10 @@ struct gether { bool is_fixed; u32 fixed_out_len; u32 fixed_in_len; + unsigned ul_max_pkts_per_xfer; +/* Max number of SKB packets to be used to create Multi Packet RNDIS */ +#define TX_SKB_HOLD_THRESHOLD 3 + bool multi_pkt_xfer; struct sk_buff *(*wrap)(struct gether *port, struct sk_buff *skb); int (*unwrap)(struct gether *port, From 945a225e77ffa18377e397654867620c71f82998 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Sat, 9 Nov 2013 00:51:56 +0100 Subject: [PATCH 38/82] ARM: 7888/1: seccomp: not compatible with ARM OABI Make sure that seccomp filter won't be built when ARM OABI is in use, since there is work needed to distinguish calling conventions. Until that is done (which is likely never since OABI is deprecated), make sure seccomp filter is unavailable in the OABI world. Signed-off-by: Kees Cook Reviewed-by: Will Drewry Signed-off-by: Russell King --- arch/arm/Kconfig | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 99887aaa04bb..0dde9b9e91c2 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -22,7 +22,7 @@ config ARM select HAVE_AOUT select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL select HAVE_ARCH_KGDB - select HAVE_ARCH_SECCOMP_FILTER + select HAVE_ARCH_SECCOMP_FILTER if (AEABI && !OABI_COMPAT) select HAVE_ARCH_TRACEHOOK select HAVE_BPF_JIT select HAVE_C_RECORDMCOUNT @@ -1681,6 +1681,11 @@ config OABI_COMPAT in memory differs between the legacy ABI and the new ARM EABI (only for non "thumb" binaries). This option adds a tiny overhead to all syscalls and produces a slightly larger kernel. + + The seccomp filter system will not be available when this is + selected, since there is no way yet to sensibly distinguish + between calling conventions during filtering. + If you know you'll be using only pure EABI user space then you can say N here. If this option is not selected and you attempt to execute a legacy ABI binary then the result will be From c2da3eba6ac4b2df16d4aa13ce0f9bd360000baa Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 16 Apr 2014 10:54:34 -0700 Subject: [PATCH 39/82] seccomp: fix memory leak on filter attach This sets the correct error code when final filter memory is unavailable, and frees the raw filter no matter what. unreferenced object 0xffff8800d6ea4000 (size 512): comm "sshd", pid 278, jiffies 4294898315 (age 46.653s) hex dump (first 32 bytes): 21 00 00 00 04 00 00 00 15 00 01 00 3e 00 00 c0 !...........>... 06 00 00 00 00 00 00 00 21 00 00 00 00 00 00 00 ........!....... backtrace: [] kmemleak_alloc+0x4e/0xb0 [] __kmalloc+0x280/0x320 [] prctl_set_seccomp+0x11e/0x3b0 [] SyS_prctl+0x3bb/0x4a0 [] system_call_fastpath+0x1a/0x1f [] 0xffffffffffffffff Reported-by: Masami Ichikawa Signed-off-by: Kees Cook Tested-by: Masami Ichikawa Acked-by: Daniel Borkmann Signed-off-by: David S. Miller Conflicts: kernel/seccomp.c Change-Id: Ide3c27bf378397f8faf4218e75c31e4b8bc43c4c --- kernel/seccomp.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/kernel/seccomp.c b/kernel/seccomp.c index b7a10048a32c..260ff1fce63d 100644 --- a/kernel/seccomp.c +++ b/kernel/seccomp.c @@ -273,7 +273,23 @@ static long seccomp_attach_filter(struct sock_fprog *fprog) /* Check and rewrite the fprog for seccomp use */ ret = seccomp_check_filter(filter->insns, filter->len); if (ret) - goto fail; + goto free_prog; + + /* Allocate a new seccomp_filter */ + ret = -ENOMEM; + filter = kzalloc(sizeof(struct seccomp_filter) + + sizeof(struct sock_filter_int) * new_len, + GFP_KERNEL|__GFP_NOWARN); + if (!filter) + goto free_prog; + + ret = sk_convert_filter(fp, fprog->len, filter->insnsi, &new_len); + if (ret) + goto free_filter; + kfree(fp); + + atomic_set(&filter->usage, 1); + filter->len = new_len; /* * If there is an existing filter, make it the prev and don't drop its From 1ba2ccbc8cdcb0881dd91212b2f04318884dfe4a Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 18 Jul 2014 11:28:33 -0700 Subject: [PATCH 40/82] MAINTAINERS: create seccomp entry Add myself as seccomp maintainer. Suggested-by: James Morris Signed-off-by: Kees Cook --- MAINTAINERS | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index ad7e322ad17b..614483608ba4 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -7131,6 +7131,16 @@ S: Maintained F: drivers/mmc/host/sdhci.* F: drivers/mmc/host/sdhci-pltfm.[ch] +SECURE COMPUTING +M: Kees Cook +T: git git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux.git seccomp +S: Supported +F: kernel/seccomp.c +F: include/uapi/linux/seccomp.h +F: include/linux/seccomp.h +K: \bsecure_computing +K: \bTIF_SECCOMP\b + SECURE DIGITAL HOST CONTROLLER INTERFACE, OPEN FIRMWARE BINDINGS (SDHCI-OF) M: Anton Vorontsov L: linuxppc-dev@lists.ozlabs.org From 6862b0143649033ca56fd460684e632b35a59abc Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 21 May 2014 15:02:11 -0700 Subject: [PATCH 41/82] seccomp: create internal mode-setting function In preparation for having other callers of the seccomp mode setting logic, split the prctl entry point away from the core logic that performs seccomp mode setting. Signed-off-by: Kees Cook Reviewed-by: Oleg Nesterov Reviewed-by: Andy Lutomirski --- kernel/seccomp.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/kernel/seccomp.c b/kernel/seccomp.c index 260ff1fce63d..bf1cd927707f 100644 --- a/kernel/seccomp.c +++ b/kernel/seccomp.c @@ -483,7 +483,7 @@ long prctl_get_seccomp(void) } /** - * prctl_set_seccomp: configures current->seccomp.mode + * seccomp_set_mode: internal function for setting seccomp mode * @seccomp_mode: requested mode to use * @filter: optional struct sock_fprog for use with SECCOMP_MODE_FILTER * @@ -496,7 +496,7 @@ long prctl_get_seccomp(void) * * Returns 0 on success or -EINVAL on failure. */ -long prctl_set_seccomp(unsigned long seccomp_mode, char __user *filter) +static long seccomp_set_mode(unsigned long seccomp_mode, char __user *filter) { long ret = -EINVAL; @@ -527,3 +527,15 @@ long prctl_set_seccomp(unsigned long seccomp_mode, char __user *filter) out: return ret; } + +/** + * prctl_set_seccomp: configures current->seccomp.mode + * @seccomp_mode: requested mode to use + * @filter: optional struct sock_fprog for use with SECCOMP_MODE_FILTER + * + * Returns 0 on success or -EINVAL on failure. + */ +long prctl_set_seccomp(unsigned long seccomp_mode, char __user *filter) +{ + return seccomp_set_mode(seccomp_mode, filter); +} From c208e4e9f119bc9cd0c444344de6407c0d373a2c Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 25 Jun 2014 15:38:02 -0700 Subject: [PATCH 42/82] seccomp: extract check/assign mode helpers To support splitting mode 1 from mode 2, extract the mode checking and assignment logic into common functions. Signed-off-by: Kees Cook Reviewed-by: Oleg Nesterov Reviewed-by: Andy Lutomirski --- kernel/seccomp.c | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/kernel/seccomp.c b/kernel/seccomp.c index bf1cd927707f..f627d2cb0815 100644 --- a/kernel/seccomp.c +++ b/kernel/seccomp.c @@ -219,7 +219,23 @@ static u32 seccomp_run_filters(int syscall) } return ret; } +#endif /* CONFIG_SECCOMP_FILTER */ +static inline bool seccomp_may_assign_mode(unsigned long seccomp_mode) +{ + if (current->seccomp.mode && current->seccomp.mode != seccomp_mode) + return false; + + return true; +} + +static inline void seccomp_assign_mode(unsigned long seccomp_mode) +{ + current->seccomp.mode = seccomp_mode; + set_tsk_thread_flag(current, TIF_SECCOMP); +} + +#ifdef CONFIG_SECCOMP_FILTER /** * seccomp_attach_filter: Attaches a seccomp filter to current. * @fprog: BPF program to install @@ -500,8 +516,7 @@ static long seccomp_set_mode(unsigned long seccomp_mode, char __user *filter) { long ret = -EINVAL; - if (current->seccomp.mode && - current->seccomp.mode != seccomp_mode) + if (!seccomp_may_assign_mode(seccomp_mode)) goto out; switch (seccomp_mode) { @@ -522,8 +537,7 @@ static long seccomp_set_mode(unsigned long seccomp_mode, char __user *filter) goto out; } - current->seccomp.mode = seccomp_mode; - set_thread_flag(TIF_SECCOMP); + seccomp_assign_mode(seccomp_mode); out: return ret; } From 1a63bcec4fb055acbed31a5cf1cc40e968cf0664 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 25 Jun 2014 15:55:25 -0700 Subject: [PATCH 43/82] seccomp: split mode setting routines Separates the two mode setting paths to make things more readable with fewer #ifdefs within function bodies. Signed-off-by: Kees Cook Reviewed-by: Oleg Nesterov Reviewed-by: Andy Lutomirski --- kernel/seccomp.c | 71 ++++++++++++++++++++++++++++++++---------------- 1 file changed, 48 insertions(+), 23 deletions(-) diff --git a/kernel/seccomp.c b/kernel/seccomp.c index f627d2cb0815..793bec19b6c7 100644 --- a/kernel/seccomp.c +++ b/kernel/seccomp.c @@ -499,48 +499,66 @@ long prctl_get_seccomp(void) } /** - * seccomp_set_mode: internal function for setting seccomp mode - * @seccomp_mode: requested mode to use - * @filter: optional struct sock_fprog for use with SECCOMP_MODE_FILTER - * - * This function may be called repeatedly with a @seccomp_mode of - * SECCOMP_MODE_FILTER to install additional filters. Every filter - * successfully installed will be evaluated (in reverse order) for each system - * call the task makes. + * seccomp_set_mode_strict: internal function for setting strict seccomp * * Once current->seccomp.mode is non-zero, it may not be changed. * * Returns 0 on success or -EINVAL on failure. */ -static long seccomp_set_mode(unsigned long seccomp_mode, char __user *filter) +static long seccomp_set_mode_strict(void) { + const unsigned long seccomp_mode = SECCOMP_MODE_STRICT; long ret = -EINVAL; if (!seccomp_may_assign_mode(seccomp_mode)) goto out; - switch (seccomp_mode) { - case SECCOMP_MODE_STRICT: - ret = 0; #ifdef TIF_NOTSC - disable_TSC(); + disable_TSC(); #endif - break; + seccomp_assign_mode(seccomp_mode); + ret = 0; + +out: + + return ret; +} + #ifdef CONFIG_SECCOMP_FILTER - case SECCOMP_MODE_FILTER: - ret = seccomp_attach_user_filter(filter); - if (ret) - goto out; - break; -#endif - default: +/** + * seccomp_set_mode_filter: internal function for setting seccomp filter + * @filter: struct sock_fprog containing filter + * + * This function may be called repeatedly to install additional filters. + * Every filter successfully installed will be evaluated (in reverse order) + * for each system call the task makes. + * + * Once current->seccomp.mode is non-zero, it may not be changed. + * + * Returns 0 on success or -EINVAL on failure. + */ +static long seccomp_set_mode_filter(char __user *filter) +{ + const unsigned long seccomp_mode = SECCOMP_MODE_FILTER; + long ret = -EINVAL; + + if (!seccomp_may_assign_mode(seccomp_mode)) + goto out; + + ret = seccomp_attach_user_filter(filter); + if (ret) goto out; - } seccomp_assign_mode(seccomp_mode); out: return ret; } +#else +static inline long seccomp_set_mode_filter(char __user *filter) +{ + return -EINVAL; +} +#endif /** * prctl_set_seccomp: configures current->seccomp.mode @@ -551,5 +569,12 @@ out: */ long prctl_set_seccomp(unsigned long seccomp_mode, char __user *filter) { - return seccomp_set_mode(seccomp_mode, filter); + switch (seccomp_mode) { + case SECCOMP_MODE_STRICT: + return seccomp_set_mode_strict(); + case SECCOMP_MODE_FILTER: + return seccomp_set_mode_filter(filter); + default: + return -EINVAL; + } } From a75a29b16e1f3eb2afcb9b2567edfc4cd8899635 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 25 Jun 2014 16:08:24 -0700 Subject: [PATCH 44/82] seccomp: add "seccomp" syscall This adds the new "seccomp" syscall with both an "operation" and "flags" parameter for future expansion. The third argument is a pointer value, used with the SECCOMP_SET_MODE_FILTER operation. Currently, flags must be 0. This is functionally equivalent to prctl(PR_SET_SECCOMP, ...). In addition to the TSYNC flag later in this patch series, there is a non-zero chance that this syscall could be used for configuring a fixed argument area for seccomp-tracer-aware processes to pass syscall arguments in the future. Hence, the use of "seccomp" not simply "seccomp_add_filter" for this syscall. Additionally, this syscall uses operation, flags, and user pointer for arguments because strictly passing arguments via a user pointer would mean seccomp itself would be unable to trivially filter the seccomp syscall itself. Signed-off-by: Kees Cook Reviewed-by: Oleg Nesterov Reviewed-by: Andy Lutomirski Conflicts: arch/x86/syscalls/syscall_32.tbl arch/x86/syscalls/syscall_64.tbl include/uapi/asm-generic/unistd.h kernel/seccomp.c Change-Id: Id7a365079829fd9164315dec75d6ee415c29b176 --- arch/Kconfig | 1 + arch/x86/syscalls/syscall_32.tbl | 5 +++ arch/x86/syscalls/syscall_64.tbl | 5 +++ include/linux/syscalls.h | 2 ++ include/uapi/asm-generic/unistd.h | 12 ++++++- include/uapi/linux/seccomp.h | 4 +++ kernel/seccomp.c | 55 ++++++++++++++++++++++++++++--- kernel/sys_ni.c | 3 ++ 8 files changed, 81 insertions(+), 6 deletions(-) diff --git a/arch/Kconfig b/arch/Kconfig index a4429bcd609e..84c94a89e75b 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -331,6 +331,7 @@ config HAVE_ARCH_SECCOMP_FILTER - secure_computing is called from a ptrace_event()-safe context - secure_computing return value is checked and a return value of -1 results in the system call being skipped immediately. + - seccomp syscall wired up config SECCOMP_FILTER def_bool y diff --git a/arch/x86/syscalls/syscall_32.tbl b/arch/x86/syscalls/syscall_32.tbl index aabfb8380a1c..de6d048d0305 100644 --- a/arch/x86/syscalls/syscall_32.tbl +++ b/arch/x86/syscalls/syscall_32.tbl @@ -357,3 +357,8 @@ 348 i386 process_vm_writev sys_process_vm_writev compat_sys_process_vm_writev 349 i386 kcmp sys_kcmp 350 i386 finit_module sys_finit_module +# Backporting seccomp, skip a few ... +# 351 i386 sched_setattr sys_sched_setattr +# 352 i386 sched_getattr sys_sched_getattr +# 353 i386 renameat2 sys_renameat2 +354 i386 seccomp sys_seccomp diff --git a/arch/x86/syscalls/syscall_64.tbl b/arch/x86/syscalls/syscall_64.tbl index 38ae65dfd14f..a40bd6eda554 100644 --- a/arch/x86/syscalls/syscall_64.tbl +++ b/arch/x86/syscalls/syscall_64.tbl @@ -320,6 +320,11 @@ 311 64 process_vm_writev sys_process_vm_writev 312 common kcmp sys_kcmp 313 common finit_module sys_finit_module +# Backporting seccomp, skip a few ... +# 314 common sched_setattr sys_sched_setattr +# 315 common sched_getattr sys_sched_getattr +# 316 common renameat2 sys_renameat2 +317 common seccomp sys_seccomp # # x32-specific system call numbers start at 512 to avoid cache impact diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 4147d700a293..2a955dcc863c 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -841,4 +841,6 @@ asmlinkage long sys_process_vm_writev(pid_t pid, asmlinkage long sys_kcmp(pid_t pid1, pid_t pid2, int type, unsigned long idx1, unsigned long idx2); asmlinkage long sys_finit_module(int fd, const char __user *uargs, int flags); +asmlinkage long sys_seccomp(unsigned int op, unsigned int flags, + const char __user *uargs); #endif diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h index 0cc74c4403e4..b422ad5d238b 100644 --- a/include/uapi/asm-generic/unistd.h +++ b/include/uapi/asm-generic/unistd.h @@ -692,9 +692,19 @@ __SC_COMP(__NR_process_vm_writev, sys_process_vm_writev, \ __SYSCALL(__NR_kcmp, sys_kcmp) #define __NR_finit_module 273 __SYSCALL(__NR_finit_module, sys_finit_module) +/* Backporting seccomp, skip a few ... + * #define __NR_sched_setattr 274 +__SYSCALL(__NR_sched_setattr, sys_sched_setattr) + * #define __NR_sched_getattr 275 +__SYSCALL(__NR_sched_getattr, sys_sched_getattr) + * #define __NR_renameat2 276 +__SYSCALL(__NR_renameat2, sys_renameat2) + */ +#define __NR_seccomp 277 +__SYSCALL(__NR_seccomp, sys_seccomp) #undef __NR_syscalls -#define __NR_syscalls 274 +#define __NR_syscalls 278 /* * All syscalls below here should go away really, diff --git a/include/uapi/linux/seccomp.h b/include/uapi/linux/seccomp.h index ac2dc9f72973..b258878ba754 100644 --- a/include/uapi/linux/seccomp.h +++ b/include/uapi/linux/seccomp.h @@ -10,6 +10,10 @@ #define SECCOMP_MODE_STRICT 1 /* uses hard-coded filter. */ #define SECCOMP_MODE_FILTER 2 /* uses user-supplied filter. */ +/* Valid operations for seccomp syscall. */ +#define SECCOMP_SET_MODE_STRICT 0 +#define SECCOMP_SET_MODE_FILTER 1 + /* * All BPF programs must return a 32-bit value. * The bottom 16-bits are for optional return data. diff --git a/kernel/seccomp.c b/kernel/seccomp.c index 793bec19b6c7..47dc5c59621e 100644 --- a/kernel/seccomp.c +++ b/kernel/seccomp.c @@ -18,6 +18,7 @@ #include #include #include +#include /* #define SECCOMP_DEBUG 1 */ @@ -325,7 +326,7 @@ fail: * * Returns 0 on success and non-zero otherwise. */ -long seccomp_attach_user_filter(char __user *user_filter) +static long seccomp_attach_user_filter(const char __user *user_filter) { struct sock_fprog fprog; long ret = -EFAULT; @@ -527,6 +528,7 @@ out: #ifdef CONFIG_SECCOMP_FILTER /** * seccomp_set_mode_filter: internal function for setting seccomp filter + * @flags: flags to change filter behavior * @filter: struct sock_fprog containing filter * * This function may be called repeatedly to install additional filters. @@ -537,11 +539,16 @@ out: * * Returns 0 on success or -EINVAL on failure. */ -static long seccomp_set_mode_filter(char __user *filter) +static long seccomp_set_mode_filter(unsigned int flags, + const char __user *filter) { const unsigned long seccomp_mode = SECCOMP_MODE_FILTER; long ret = -EINVAL; + /* Validate flags. */ + if (flags != 0) + goto out; + if (!seccomp_may_assign_mode(seccomp_mode)) goto out; @@ -554,12 +561,35 @@ out: return ret; } #else -static inline long seccomp_set_mode_filter(char __user *filter) +static inline long seccomp_set_mode_filter(unsigned int flags, + const char __user *filter) { return -EINVAL; } #endif +/* Common entry point for both prctl and syscall. */ +static long do_seccomp(unsigned int op, unsigned int flags, + const char __user *uargs) +{ + switch (op) { + case SECCOMP_SET_MODE_STRICT: + if (flags != 0 || uargs != NULL) + return -EINVAL; + return seccomp_set_mode_strict(); + case SECCOMP_SET_MODE_FILTER: + return seccomp_set_mode_filter(flags, uargs); + default: + return -EINVAL; + } +} + +SYSCALL_DEFINE3(seccomp, unsigned int, op, unsigned int, flags, + const char __user *, uargs) +{ + return do_seccomp(op, flags, uargs); +} + /** * prctl_set_seccomp: configures current->seccomp.mode * @seccomp_mode: requested mode to use @@ -569,12 +599,27 @@ static inline long seccomp_set_mode_filter(char __user *filter) */ long prctl_set_seccomp(unsigned long seccomp_mode, char __user *filter) { + unsigned int op; + char __user *uargs; + switch (seccomp_mode) { case SECCOMP_MODE_STRICT: - return seccomp_set_mode_strict(); + op = SECCOMP_SET_MODE_STRICT; + /* + * Setting strict mode through prctl always ignored filter, + * so make sure it is always NULL here to pass the internal + * check in do_seccomp(). + */ + uargs = NULL; + break; case SECCOMP_MODE_FILTER: - return seccomp_set_mode_filter(filter); + op = SECCOMP_SET_MODE_FILTER; + uargs = filter; + break; default: return -EINVAL; } + + /* prctl interface doesn't have flags, so they are always zero. */ + return do_seccomp(op, 0, uargs); } diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c index 7078052284fd..7e7fc0a082c4 100644 --- a/kernel/sys_ni.c +++ b/kernel/sys_ni.c @@ -209,3 +209,6 @@ cond_syscall(compat_sys_open_by_handle_at); /* compare kernel pointers */ cond_syscall(sys_kcmp); + +/* operate on Secure Computing state */ +cond_syscall(sys_seccomp); From 83f1ccba87b06575966b65352db565c363af7bcf Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 10 Jun 2014 15:40:23 -0700 Subject: [PATCH 45/82] ARM: add seccomp syscall Wires up the new seccomp syscall. Signed-off-by: Kees Cook Reviewed-by: Oleg Nesterov Conflicts: arch/arm/include/uapi/asm/unistd.h arch/arm/kernel/calls.S Change-Id: Ia519993681f70bd38699a73d8897ae9b44b3f0af --- arch/arm/include/uapi/asm/unistd.h | 6 ++++++ arch/arm/kernel/calls.S | 4 ++++ 2 files changed, 10 insertions(+) diff --git a/arch/arm/include/uapi/asm/unistd.h b/arch/arm/include/uapi/asm/unistd.h index af33b44990ed..bbe80a7cba0c 100644 --- a/arch/arm/include/uapi/asm/unistd.h +++ b/arch/arm/include/uapi/asm/unistd.h @@ -406,6 +406,12 @@ #define __NR_process_vm_writev (__NR_SYSCALL_BASE+377) #define __NR_kcmp (__NR_SYSCALL_BASE+378) #define __NR_finit_module (__NR_SYSCALL_BASE+379) +/* Backporting seccomp, skip a few ... + * #define __NR_sched_setattr (__NR_SYSCALL_BASE+380) + * #define __NR_sched_getattr (__NR_SYSCALL_BASE+381) + * #define __NR_renameat2 (__NR_SYSCALL_BASE+382) + */ +#define __NR_seccomp (__NR_SYSCALL_BASE+383) /* * This may need to be greater than __NR_last_syscall+1 in order to diff --git a/arch/arm/kernel/calls.S b/arch/arm/kernel/calls.S index c6ca7e376773..1a2e529a1340 100644 --- a/arch/arm/kernel/calls.S +++ b/arch/arm/kernel/calls.S @@ -389,6 +389,10 @@ CALL(sys_process_vm_writev) CALL(sys_kcmp) CALL(sys_finit_module) +/* 380 */ CALL(sys_ni_syscall) /* CALL(sys_sched_setattr) */ + CALL(sys_ni_syscall) /* CALL(sys_sched_getattr) */ + CALL(sys_ni_syscall) /* CALL(sys_renameat2) */ + CALL(sys_seccomp) #ifndef syscalls_counted .equ syscalls_padding, ((NR_syscalls + 3) & ~3) - NR_syscalls #define syscalls_counted From 2c6d7de579c2cfebbc9378e5209c641a93839f0a Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 10 Jun 2014 15:45:09 -0700 Subject: [PATCH 46/82] MIPS: add seccomp syscall Wires up the new seccomp syscall. Signed-off-by: Kees Cook Reviewed-by: Oleg Nesterov Conflicts: arch/mips/include/uapi/asm/unistd.h arch/mips/kernel/scall32-o32.S arch/mips/kernel/scall64-64.S arch/mips/kernel/scall64-n32.S arch/mips/kernel/scall64-o32.S Change-Id: I7031bdbec7c90292aeb7e255c73cb36e6ec43af2 --- arch/mips/include/uapi/asm/unistd.h | 30 +++++++++++++++++++++++------ arch/mips/kernel/scall32-o32.S | 6 ++++++ arch/mips/kernel/scall64-64.S | 4 ++++ arch/mips/kernel/scall64-n32.S | 4 ++++ arch/mips/kernel/scall64-o32.S | 6 +++++- 5 files changed, 43 insertions(+), 7 deletions(-) diff --git a/arch/mips/include/uapi/asm/unistd.h b/arch/mips/include/uapi/asm/unistd.h index 1dee279f9665..af4d5c0a2f02 100644 --- a/arch/mips/include/uapi/asm/unistd.h +++ b/arch/mips/include/uapi/asm/unistd.h @@ -369,16 +369,22 @@ #define __NR_process_vm_writev (__NR_Linux + 346) #define __NR_kcmp (__NR_Linux + 347) #define __NR_finit_module (__NR_Linux + 348) +/* Backporting seccomp, skip a few ... + * #define __NR_sched_setattr (__NR_Linux + 349) + * #define __NR_sched_getattr (__NR_Linux + 350) + * #define __NR_renameat2 (__NR_Linux + 351) + */ +#define __NR_seccomp (__NR_Linux + 352) /* * Offset of the last Linux o32 flavoured syscall */ -#define __NR_Linux_syscalls 348 +#define __NR_Linux_syscalls 352 #endif /* _MIPS_SIM == _MIPS_SIM_ABI32 */ #define __NR_O32_Linux 4000 -#define __NR_O32_Linux_syscalls 348 +#define __NR_O32_Linux_syscalls 352 #if _MIPS_SIM == _MIPS_SIM_ABI64 @@ -695,16 +701,22 @@ #define __NR_kcmp (__NR_Linux + 306) #define __NR_finit_module (__NR_Linux + 307) #define __NR_getdents64 (__NR_Linux + 308) +/* Backporting seccomp, skip a few ... + * #define __NR_sched_setattr (__NR_Linux + 309) + * #define __NR_sched_getattr (__NR_Linux + 310) + * #define __NR_renameat2 (__NR_Linux + 311) + */ +#define __NR_seccomp (__NR_Linux + 312) /* * Offset of the last Linux 64-bit flavoured syscall */ -#define __NR_Linux_syscalls 308 +#define __NR_Linux_syscalls 312 #endif /* _MIPS_SIM == _MIPS_SIM_ABI64 */ #define __NR_64_Linux 5000 -#define __NR_64_Linux_syscalls 308 +#define __NR_64_Linux_syscalls 312 #if _MIPS_SIM == _MIPS_SIM_NABI32 @@ -1025,15 +1037,21 @@ #define __NR_process_vm_writev (__NR_Linux + 310) #define __NR_kcmp (__NR_Linux + 311) #define __NR_finit_module (__NR_Linux + 312) +/* Backporting seccomp, skip a few ... + * #define __NR_sched_setattr (__NR_Linux + 313) + * #define __NR_sched_getattr (__NR_Linux + 314) + * #define __NR_renameat2 (__NR_Linux + 315) + */ +#define __NR_seccomp (__NR_Linux + 316) /* * Offset of the last N32 flavoured syscall */ -#define __NR_Linux_syscalls 312 +#define __NR_Linux_syscalls 316 #endif /* _MIPS_SIM == _MIPS_SIM_NABI32 */ #define __NR_N32_Linux 6000 -#define __NR_N32_Linux_syscalls 312 +#define __NR_N32_Linux_syscalls 316 #endif /* _UAPI_ASM_UNISTD_H */ diff --git a/arch/mips/kernel/scall32-o32.S b/arch/mips/kernel/scall32-o32.S index 9b36424b03c5..bcb2184e8a47 100644 --- a/arch/mips/kernel/scall32-o32.S +++ b/arch/mips/kernel/scall32-o32.S @@ -593,6 +593,12 @@ einval: li v0, -ENOSYS sys sys_process_vm_writev 6 sys sys_kcmp 5 sys sys_finit_module 3 + /* Backporting seccomp, skip a few ... */ + sys sys_ni_syscall 0 /* sys_sched_setattr */ + sys sys_ni_syscall 0 /* sys_sched_getattr */ /* 4350 */ + sys sys_ni_syscall 0 /* sys_renameat2 */ + sys sys_seccomp 3 + .endm /* We pre-compute the number of _instruction_ bytes needed to diff --git a/arch/mips/kernel/scall64-64.S b/arch/mips/kernel/scall64-64.S index 97a5909a61cf..285872f9d6d1 100644 --- a/arch/mips/kernel/scall64-64.S +++ b/arch/mips/kernel/scall64-64.S @@ -424,4 +424,8 @@ sys_call_table: PTR sys_kcmp PTR sys_finit_module PTR sys_getdents64 + sys sys_ni_syscall /* sys_sched_setattr */ + sys sys_ni_syscall /* sys_sched_getattr */ /* 5310 */ + sys sys_ni_syscall /* sys_renameat2 */ + sys sys_seccomp .size sys_call_table,.-sys_call_table diff --git a/arch/mips/kernel/scall64-n32.S b/arch/mips/kernel/scall64-n32.S index edcb6594e7b5..bdee1a1ed1c2 100644 --- a/arch/mips/kernel/scall64-n32.S +++ b/arch/mips/kernel/scall64-n32.S @@ -417,4 +417,8 @@ EXPORT(sysn32_call_table) PTR compat_sys_process_vm_writev /* 6310 */ PTR sys_kcmp PTR sys_finit_module + sys sys_ni_syscall /* sys_sched_setattr */ + sys sys_ni_syscall /* sys_sched_getattr */ + sys sys_ni_syscall /* sys_renameat2 */ /* 6315 */ + sys sys_seccomp .size sysn32_call_table,.-sysn32_call_table diff --git a/arch/mips/kernel/scall64-o32.S b/arch/mips/kernel/scall64-o32.S index 74f485d3c0ef..a1f826a24578 100644 --- a/arch/mips/kernel/scall64-o32.S +++ b/arch/mips/kernel/scall64-o32.S @@ -541,4 +541,8 @@ sys_call_table: PTR compat_sys_process_vm_writev PTR sys_kcmp PTR sys_finit_module - .size sys_call_table,.-sys_call_table + sys sys_ni_syscall /* sys_sched_setattr */ + sys sys_ni_syscall /* sys_sched_getattr */ /* 4350 */ + sys sys_ni_syscall /* sys_renameat2 */ + sys sys_seccomp + .size sys32_call_table,.-sys32_call_table From 3497a88f5510c809ca10deac3030493eabba65d7 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 27 Jun 2014 15:16:33 -0700 Subject: [PATCH 47/82] seccomp: split filter prep from check and apply In preparation for adding seccomp locking, move filter creation away from where it is checked and applied. This will allow for locking where no memory allocation is happening. The validation, filter attachment, and seccomp mode setting can all happen under the future locks. For extreme defensiveness, I've added a BUG_ON check for the calculated size of the buffer allocation in case BPF_MAXINSN ever changes, which shouldn't ever happen. The compiler should actually optimize out this check since the test above it makes it impossible. Signed-off-by: Kees Cook Reviewed-by: Oleg Nesterov Reviewed-by: Andy Lutomirski Conflicts: kernel/seccomp.c Change-Id: I8d89f80a5b4f2826d90474dcea441c41f0af6594 --- kernel/seccomp.c | 113 ++++++++++++++++++++++++++++++++--------------- 1 file changed, 77 insertions(+), 36 deletions(-) diff --git a/kernel/seccomp.c b/kernel/seccomp.c index 47dc5c59621e..5737aafa6c2f 100644 --- a/kernel/seccomp.c +++ b/kernel/seccomp.c @@ -18,6 +18,7 @@ #include #include #include +#include #include /* #define SECCOMP_DEBUG 1 */ @@ -27,7 +28,6 @@ #include #include #include -#include #include #include @@ -238,25 +238,23 @@ static inline void seccomp_assign_mode(unsigned long seccomp_mode) #ifdef CONFIG_SECCOMP_FILTER /** - * seccomp_attach_filter: Attaches a seccomp filter to current. + * seccomp_prepare_filter: Prepares a seccomp filter for use. * @fprog: BPF program to install * - * Returns 0 on success or an errno on failure. + * Returns filter on success or an ERR_PTR on failure. */ -static long seccomp_attach_filter(struct sock_fprog *fprog) +static struct seccomp_filter *seccomp_prepare_filter(struct sock_fprog *fprog) { struct seccomp_filter *filter; - unsigned long fp_size = fprog->len * sizeof(struct sock_filter); - unsigned long total_insns = fprog->len; + unsigned long fp_size; + struct sock_filter *fp; + int new_len; long ret; if (fprog->len == 0 || fprog->len > BPF_MAXINSNS) - return -EINVAL; - - for (filter = current->seccomp.filter; filter; filter = filter->prev) - total_insns += filter->len + 4; /* include a 4 instr penalty */ - if (total_insns > MAX_INSNS_PER_PATH) - return -ENOMEM; + return ERR_PTR(-EINVAL); + BUG_ON(INT_MAX / fprog->len < sizeof(struct sock_filter)); + fp_size = fprog->len * sizeof(struct sock_filter); /* * Installing a seccomp filter requires that the task have @@ -267,15 +265,11 @@ static long seccomp_attach_filter(struct sock_fprog *fprog) if (!current->no_new_privs && security_capable_noaudit(current_cred(), current_user_ns(), CAP_SYS_ADMIN) != 0) - return -EACCES; + return ERR_PTR(-EACCES); - /* Allocate a new seccomp_filter */ - filter = kzalloc(sizeof(struct seccomp_filter) + fp_size, - GFP_KERNEL|__GFP_NOWARN); - if (!filter) - return -ENOMEM; - atomic_set(&filter->usage, 1); - filter->len = fprog->len; + fp = kzalloc(fp_size, GFP_KERNEL|__GFP_NOWARN); + if (!fp) + return ERR_PTR(-ENOMEM); /* Copy the instructions from fprog. */ ret = -EFAULT; @@ -308,28 +302,28 @@ static long seccomp_attach_filter(struct sock_fprog *fprog) atomic_set(&filter->usage, 1); filter->len = new_len; - /* - * If there is an existing filter, make it the prev and don't drop its - * task reference. - */ - filter->prev = current->seccomp.filter; - current->seccomp.filter = filter; - return 0; -fail: + return filter; + +free_filter_prog: + kfree(filter->prog); +free_filter: kfree(filter); - return ret; +free_prog: + kfree(fp); + return ERR_PTR(ret); } /** - * seccomp_attach_user_filter - attaches a user-supplied sock_fprog + * seccomp_prepare_user_filter - prepares a user-supplied sock_fprog * @user_filter: pointer to the user data containing a sock_fprog. * * Returns 0 on success and non-zero otherwise. */ -static long seccomp_attach_user_filter(const char __user *user_filter) +static struct seccomp_filter * +seccomp_prepare_user_filter(const char __user *user_filter) { struct sock_fprog fprog; - long ret = -EFAULT; + struct seccomp_filter *filter = ERR_PTR(-EFAULT); #ifdef CONFIG_COMPAT if (is_compat_task()) { @@ -342,9 +336,39 @@ static long seccomp_attach_user_filter(const char __user *user_filter) #endif if (copy_from_user(&fprog, user_filter, sizeof(fprog))) goto out; - ret = seccomp_attach_filter(&fprog); + filter = seccomp_prepare_filter(&fprog); out: - return ret; + return filter; +} + +/** + * seccomp_attach_filter: validate and attach filter + * @flags: flags to change filter behavior + * @filter: seccomp filter to add to the current process + * + * Returns 0 on success, -ve on error. + */ +static long seccomp_attach_filter(unsigned int flags, + struct seccomp_filter *filter) +{ + unsigned long total_insns; + struct seccomp_filter *walker; + + /* Validate resulting filter length. */ + total_insns = filter->prog->len; + for (walker = current->seccomp.filter; walker; walker = walker->prev) + total_insns += walker->prog->len + 4; /* 4 instr penalty */ + if (total_insns > MAX_INSNS_PER_PATH) + return -ENOMEM; + + /* + * If there is an existing filter, make it the prev and don't drop its + * task reference. + */ + filter->prev = current->seccomp.filter; + current->seccomp.filter = filter; + + return 0; } /* get_seccomp_filter - increments the reference count of the filter on @tsk */ @@ -357,6 +381,14 @@ void get_seccomp_filter(struct task_struct *tsk) atomic_inc(&orig->usage); } +static inline void seccomp_filter_free(struct seccomp_filter *filter) +{ + if (filter) { + sk_filter_free(filter->prog); + kfree(filter); + } +} + /* put_seccomp_filter - decrements the ref count of tsk->seccomp.filter */ void put_seccomp_filter(struct task_struct *tsk) { @@ -365,7 +397,7 @@ void put_seccomp_filter(struct task_struct *tsk) while (orig && atomic_dec_and_test(&orig->usage)) { struct seccomp_filter *freeme = orig; orig = orig->prev; - kfree(freeme); + seccomp_filter_free(freeme); } } @@ -543,21 +575,30 @@ static long seccomp_set_mode_filter(unsigned int flags, const char __user *filter) { const unsigned long seccomp_mode = SECCOMP_MODE_FILTER; + struct seccomp_filter *prepared = NULL; long ret = -EINVAL; /* Validate flags. */ if (flags != 0) goto out; + /* Prepare the new filter before holding any locks. */ + prepared = seccomp_prepare_user_filter(filter); + if (IS_ERR(prepared)) + return PTR_ERR(prepared); + if (!seccomp_may_assign_mode(seccomp_mode)) goto out; - ret = seccomp_attach_user_filter(filter); + ret = seccomp_attach_filter(flags, prepared); if (ret) goto out; + /* Do not free the successfully attached filter. */ + prepared = NULL; seccomp_assign_mode(seccomp_mode); out: + seccomp_filter_free(prepared); return ret; } #else From 743266ae255c44edeb50911c191bf8083ea683dc Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 27 Jun 2014 15:18:48 -0700 Subject: [PATCH 48/82] seccomp: introduce writer locking Normally, task_struct.seccomp.filter is only ever read or modified by the task that owns it (current). This property aids in fast access during system call filtering as read access is lockless. Updating the pointer from another task, however, opens up race conditions. To allow cross-thread filter pointer updates, writes to the seccomp fields are now protected by the sighand spinlock (which is shared by all threads in the thread group). Read access remains lockless because pointer updates themselves are atomic. However, writes (or cloning) often entail additional checking (like maximum instruction counts) which require locking to perform safely. In the case of cloning threads, the child is invisible to the system until it enters the task list. To make sure a child can't be cloned from a thread and left in a prior state, seccomp duplication is additionally moved under the sighand lock. Then parent and child are certain have the same seccomp state when they exit the lock. Based on patches by Will Drewry and David Drysdale. Signed-off-by: Kees Cook Reviewed-by: Oleg Nesterov Reviewed-by: Andy Lutomirski Conflicts: kernel/fork.c Change-Id: Ie01ece43b610867013f7d0e0a2a7be0b9077630f --- include/linux/seccomp.h | 6 ++--- kernel/fork.c | 49 ++++++++++++++++++++++++++++++++++++++++- kernel/seccomp.c | 16 +++++++++++++- 3 files changed, 66 insertions(+), 5 deletions(-) diff --git a/include/linux/seccomp.h b/include/linux/seccomp.h index 6f19cfd1840e..9ab63a574d40 100644 --- a/include/linux/seccomp.h +++ b/include/linux/seccomp.h @@ -14,11 +14,11 @@ struct seccomp_filter; * * @mode: indicates one of the valid values above for controlled * system calls available to a process. - * @filter: The metadata and ruleset for determining what system calls - * are allowed for a task. + * @filter: must always point to a valid seccomp-filter or NULL as it is + * accessed without locking during system call entry. * * @filter must only be accessed from the context of current as there - * is no locking. + * is no read locking. */ struct seccomp { int mode; diff --git a/kernel/fork.c b/kernel/fork.c index 41671a5d637d..8a3e9a91130c 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -327,6 +327,15 @@ static struct task_struct *dup_task_struct(struct task_struct *orig) goto free_ti; tsk->stack = ti; +#ifdef CONFIG_SECCOMP + /* + * We must handle setting up seccomp filters once we're under + * the sighand lock in case orig has changed between now and + * then. Until then, filter must be NULL to avoid messing up + * the usage counts on the error path calling free_task. + */ + tsk->seccomp.filter = NULL; +#endif setup_thread_stack(tsk, orig); clear_user_return_notifier(tsk); @@ -1102,6 +1111,39 @@ static void copy_flags(unsigned long clone_flags, struct task_struct *p) p->flags = new_flags; } +static void copy_seccomp(struct task_struct *p) +{ +#ifdef CONFIG_SECCOMP + /* + * Must be called with sighand->lock held, which is common to + * all threads in the group. Holding cred_guard_mutex is not + * needed because this new task is not yet running and cannot + * be racing exec. + */ + BUG_ON(!spin_is_locked(¤t->sighand->siglock)); + + /* Ref-count the new filter user, and assign it. */ + get_seccomp_filter(current); + p->seccomp = current->seccomp; + + /* + * Explicitly enable no_new_privs here in case it got set + * between the task_struct being duplicated and holding the + * sighand lock. The seccomp state and nnp must be in sync. + */ + if (task_no_new_privs(current)) + task_set_no_new_privs(p); + + /* + * If the parent gained a seccomp mode after copying thread + * flags and between before we held the sighand lock, we have + * to manually enable the seccomp thread flag here. + */ + if (p->seccomp.mode != SECCOMP_MODE_DISABLED) + set_tsk_thread_flag(p, TIF_SECCOMP); +#endif +} + SYSCALL_DEFINE1(set_tid_address, int __user *, tidptr) { current->clear_child_tid = tidptr; @@ -1205,7 +1247,6 @@ static struct task_struct *copy_process(unsigned long clone_flags, goto fork_out; ftrace_graph_init_task(p); - get_seccomp_filter(p); rt_mutex_init_task(p); @@ -1447,6 +1488,12 @@ static struct task_struct *copy_process(unsigned long clone_flags, spin_lock(¤t->sighand->siglock); + /* + * Copy seccomp details explicitly here, in case they were changed + * before holding sighand lock. + */ + copy_seccomp(p); + /* * Process group and session signals need to be delivered to just the * parent before the fork or both the parent and the child after the diff --git a/kernel/seccomp.c b/kernel/seccomp.c index 5737aafa6c2f..616e109ccd2e 100644 --- a/kernel/seccomp.c +++ b/kernel/seccomp.c @@ -224,6 +224,8 @@ static u32 seccomp_run_filters(int syscall) static inline bool seccomp_may_assign_mode(unsigned long seccomp_mode) { + BUG_ON(!spin_is_locked(¤t->sighand->siglock)); + if (current->seccomp.mode && current->seccomp.mode != seccomp_mode) return false; @@ -232,6 +234,8 @@ static inline bool seccomp_may_assign_mode(unsigned long seccomp_mode) static inline void seccomp_assign_mode(unsigned long seccomp_mode) { + BUG_ON(!spin_is_locked(¤t->sighand->siglock)); + current->seccomp.mode = seccomp_mode; set_tsk_thread_flag(current, TIF_SECCOMP); } @@ -346,6 +350,8 @@ out: * @flags: flags to change filter behavior * @filter: seccomp filter to add to the current process * + * Caller must be holding current->sighand->siglock lock. + * * Returns 0 on success, -ve on error. */ static long seccomp_attach_filter(unsigned int flags, @@ -354,6 +360,8 @@ static long seccomp_attach_filter(unsigned int flags, unsigned long total_insns; struct seccomp_filter *walker; + BUG_ON(!spin_is_locked(¤t->sighand->siglock)); + /* Validate resulting filter length. */ total_insns = filter->prog->len; for (walker = current->seccomp.filter; walker; walker = walker->prev) @@ -543,6 +551,8 @@ static long seccomp_set_mode_strict(void) const unsigned long seccomp_mode = SECCOMP_MODE_STRICT; long ret = -EINVAL; + spin_lock_irq(¤t->sighand->siglock); + if (!seccomp_may_assign_mode(seccomp_mode)) goto out; @@ -553,6 +563,7 @@ static long seccomp_set_mode_strict(void) ret = 0; out: + spin_unlock_irq(¤t->sighand->siglock); return ret; } @@ -580,13 +591,15 @@ static long seccomp_set_mode_filter(unsigned int flags, /* Validate flags. */ if (flags != 0) - goto out; + return -EINVAL; /* Prepare the new filter before holding any locks. */ prepared = seccomp_prepare_user_filter(filter); if (IS_ERR(prepared)) return PTR_ERR(prepared); + spin_lock_irq(¤t->sighand->siglock); + if (!seccomp_may_assign_mode(seccomp_mode)) goto out; @@ -598,6 +611,7 @@ static long seccomp_set_mode_filter(unsigned int flags, seccomp_assign_mode(seccomp_mode); out: + spin_unlock_irq(¤t->sighand->siglock); seccomp_filter_free(prepared); return ret; } From d92472791131c6809f21142a8bf75c72cff6ee19 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 27 Jun 2014 15:01:35 -0700 Subject: [PATCH 49/82] seccomp: allow mode setting across threads This changes the mode setting helper to allow threads to change the seccomp mode from another thread. We must maintain barriers to keep TIF_SECCOMP synchronized with the rest of the seccomp state. Signed-off-by: Kees Cook Reviewed-by: Oleg Nesterov Reviewed-by: Andy Lutomirski Conflicts: kernel/seccomp.c Change-Id: I091ffa55d8f4e83ff02558a55e2b4dc76ac26905 --- kernel/seccomp.c | 42 ++++++++++++++++++++++++++++++------------ 1 file changed, 30 insertions(+), 12 deletions(-) diff --git a/kernel/seccomp.c b/kernel/seccomp.c index 616e109ccd2e..1d25f3cdc1be 100644 --- a/kernel/seccomp.c +++ b/kernel/seccomp.c @@ -202,19 +202,26 @@ static int seccomp_check_filter(struct sock_filter *filter, unsigned int flen) */ static u32 seccomp_run_filters(int syscall) { - struct seccomp_filter *f; + struct seccomp_filter *f = ACCESS_ONCE(current->seccomp.filter); + struct seccomp_data sd; u32 ret = SECCOMP_RET_ALLOW; /* Ensure unexpected behavior doesn't result in failing open. */ - if (WARN_ON(current->seccomp.filter == NULL)) + if (unlikely(WARN_ON(f == NULL))) return SECCOMP_RET_KILL; + /* Make sure cross-thread synced filter points somewhere sane. */ + smp_read_barrier_depends(); + + populate_seccomp_data(&sd); + /* * All filters in the list are evaluated and the lowest BPF return * value always takes priority (ignoring the DATA). */ - for (f = current->seccomp.filter; f; f = f->prev) { - u32 cur_ret = sk_run_filter(NULL, f->insns); + for (; f; f = f->prev) { + u32 cur_ret = SK_RUN_FILTER(f->prog, (void *)&sd); + if ((cur_ret & SECCOMP_RET_ACTION) < (ret & SECCOMP_RET_ACTION)) ret = cur_ret; } @@ -232,12 +239,18 @@ static inline bool seccomp_may_assign_mode(unsigned long seccomp_mode) return true; } -static inline void seccomp_assign_mode(unsigned long seccomp_mode) +static inline void seccomp_assign_mode(struct task_struct *task, + unsigned long seccomp_mode) { - BUG_ON(!spin_is_locked(¤t->sighand->siglock)); + BUG_ON(!spin_is_locked(&task->sighand->siglock)); - current->seccomp.mode = seccomp_mode; - set_tsk_thread_flag(current, TIF_SECCOMP); + task->seccomp.mode = seccomp_mode; + /* + * Make sure TIF_SECCOMP cannot be set before the mode (and + * filter) is set. + */ + smp_mb__before_atomic(); + set_tsk_thread_flag(task, TIF_SECCOMP); } #ifdef CONFIG_SECCOMP_FILTER @@ -449,12 +462,17 @@ static int mode1_syscalls_32[] = { int __secure_computing(int this_syscall) { - int mode = current->seccomp.mode; int exit_sig = 0; int *syscall; u32 ret; - switch (mode) { + /* + * Make sure that any changes to mode from another thread have + * been seen after TIF_SECCOMP was seen. + */ + rmb(); + + switch (current->seccomp.mode) { case SECCOMP_MODE_STRICT: syscall = mode1_syscalls; #ifdef CONFIG_COMPAT @@ -559,7 +577,7 @@ static long seccomp_set_mode_strict(void) #ifdef TIF_NOTSC disable_TSC(); #endif - seccomp_assign_mode(seccomp_mode); + seccomp_assign_mode(current, seccomp_mode); ret = 0; out: @@ -609,7 +627,7 @@ static long seccomp_set_mode_filter(unsigned int flags, /* Do not free the successfully attached filter. */ prepared = NULL; - seccomp_assign_mode(seccomp_mode); + seccomp_assign_mode(current, seccomp_mode); out: spin_unlock_irq(¤t->sighand->siglock); seccomp_filter_free(prepared); From 81ff7fa232f4d404346ba6a22ac70f7e3385c632 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 5 Jun 2014 00:23:17 -0700 Subject: [PATCH 50/82] seccomp: implement SECCOMP_FILTER_FLAG_TSYNC Applying restrictive seccomp filter programs to large or diverse codebases often requires handling threads which may be started early in the process lifetime (e.g., by code that is linked in). While it is possible to apply permissive programs prior to process start up, it is difficult to further restrict the kernel ABI to those threads after that point. This change adds a new seccomp syscall flag to SECCOMP_SET_MODE_FILTER for synchronizing thread group seccomp filters at filter installation time. When calling seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC, filter) an attempt will be made to synchronize all threads in current's threadgroup to its new seccomp filter program. This is possible iff all threads are using a filter that is an ancestor to the filter current is attempting to synchronize to. NULL filters (where the task is running as SECCOMP_MODE_NONE) are also treated as ancestors allowing threads to be transitioned into SECCOMP_MODE_FILTER. If prctrl(PR_SET_NO_NEW_PRIVS, ...) has been set on the calling thread, no_new_privs will be set for all synchronized threads too. On success, 0 is returned. On failure, the pid of one of the failing threads will be returned and no filters will have been applied. The race conditions against another thread are: - requesting TSYNC (already handled by sighand lock) - performing a clone (already handled by sighand lock) - changing its filter (already handled by sighand lock) - calling exec (handled by cred_guard_mutex) The clone case is assisted by the fact that new threads will have their seccomp state duplicated from their parent before appearing on the tasklist. Holding cred_guard_mutex means that seccomp filters cannot be assigned while in the middle of another thread's exec (potentially bypassing no_new_privs or similar). The call to de_thread() may kill threads waiting for the mutex. Changes across threads to the filter pointer includes a barrier. Based on patches by Will Drewry. Suggested-by: Julien Tinnes Signed-off-by: Kees Cook Reviewed-by: Oleg Nesterov Reviewed-by: Andy Lutomirski --- fs/exec.c | 2 +- include/linux/seccomp.h | 2 + include/uapi/linux/seccomp.h | 3 + kernel/seccomp.c | 135 ++++++++++++++++++++++++++++++++++- 4 files changed, 140 insertions(+), 2 deletions(-) diff --git a/fs/exec.c b/fs/exec.c index ffd7a813ad3d..b331086ace95 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1220,7 +1220,7 @@ EXPORT_SYMBOL(install_exec_creds); /* * determine how safe it is to execute the proposed program * - the caller must hold ->cred_guard_mutex to protect against - * PTRACE_ATTACH + * PTRACE_ATTACH or seccomp thread-sync */ static int check_unsafe_exec(struct linux_binprm *bprm) { diff --git a/include/linux/seccomp.h b/include/linux/seccomp.h index 9ab63a574d40..9687691799ff 100644 --- a/include/linux/seccomp.h +++ b/include/linux/seccomp.h @@ -3,6 +3,8 @@ #include +#define SECCOMP_FILTER_FLAG_MASK (SECCOMP_FILTER_FLAG_TSYNC) + #ifdef CONFIG_SECCOMP #include diff --git a/include/uapi/linux/seccomp.h b/include/uapi/linux/seccomp.h index b258878ba754..0f238a43ff1e 100644 --- a/include/uapi/linux/seccomp.h +++ b/include/uapi/linux/seccomp.h @@ -14,6 +14,9 @@ #define SECCOMP_SET_MODE_STRICT 0 #define SECCOMP_SET_MODE_FILTER 1 +/* Valid flags for SECCOMP_SET_MODE_FILTER */ +#define SECCOMP_FILTER_FLAG_TSYNC 1 + /* * All BPF programs must return a 32-bit value. * The bottom 16-bits are for optional return data. diff --git a/kernel/seccomp.c b/kernel/seccomp.c index 1d25f3cdc1be..2d13b264d850 100644 --- a/kernel/seccomp.c +++ b/kernel/seccomp.c @@ -26,6 +26,7 @@ #ifdef CONFIG_SECCOMP_FILTER #include #include +#include #include #include #include @@ -254,6 +255,114 @@ static inline void seccomp_assign_mode(struct task_struct *task, } #ifdef CONFIG_SECCOMP_FILTER +/* Returns 1 if the parent is an ancestor of the child. */ +static int is_ancestor(struct seccomp_filter *parent, + struct seccomp_filter *child) +{ + /* NULL is the root ancestor. */ + if (parent == NULL) + return 1; + for (; child; child = child->prev) + if (child == parent) + return 1; + return 0; +} + +/** + * seccomp_can_sync_threads: checks if all threads can be synchronized + * + * Expects sighand and cred_guard_mutex locks to be held. + * + * Returns 0 on success, -ve on error, or the pid of a thread which was + * either not in the correct seccomp mode or it did not have an ancestral + * seccomp filter. + */ +static inline pid_t seccomp_can_sync_threads(void) +{ + struct task_struct *thread, *caller; + + BUG_ON(!mutex_is_locked(¤t->signal->cred_guard_mutex)); + BUG_ON(!spin_is_locked(¤t->sighand->siglock)); + + /* Validate all threads being eligible for synchronization. */ + caller = current; + for_each_thread(caller, thread) { + pid_t failed; + + /* Skip current, since it is initiating the sync. */ + if (thread == caller) + continue; + + if (thread->seccomp.mode == SECCOMP_MODE_DISABLED || + (thread->seccomp.mode == SECCOMP_MODE_FILTER && + is_ancestor(thread->seccomp.filter, + caller->seccomp.filter))) + continue; + + /* Return the first thread that cannot be synchronized. */ + failed = task_pid_vnr(thread); + /* If the pid cannot be resolved, then return -ESRCH */ + if (unlikely(WARN_ON(failed == 0))) + failed = -ESRCH; + return failed; + } + + return 0; +} + +/** + * seccomp_sync_threads: sets all threads to use current's filter + * + * Expects sighand and cred_guard_mutex locks to be held, and for + * seccomp_can_sync_threads() to have returned success already + * without dropping the locks. + * + */ +static inline void seccomp_sync_threads(void) +{ + struct task_struct *thread, *caller; + + BUG_ON(!mutex_is_locked(¤t->signal->cred_guard_mutex)); + BUG_ON(!spin_is_locked(¤t->sighand->siglock)); + + /* Synchronize all threads. */ + caller = current; + for_each_thread(caller, thread) { + /* Skip current, since it needs no changes. */ + if (thread == caller) + continue; + + /* Get a task reference for the new leaf node. */ + get_seccomp_filter(caller); + /* + * Drop the task reference to the shared ancestor since + * current's path will hold a reference. (This also + * allows a put before the assignment.) + */ + put_seccomp_filter(thread); + smp_store_release(&thread->seccomp.filter, + caller->seccomp.filter); + /* + * Opt the other thread into seccomp if needed. + * As threads are considered to be trust-realm + * equivalent (see ptrace_may_access), it is safe to + * allow one thread to transition the other. + */ + if (thread->seccomp.mode == SECCOMP_MODE_DISABLED) { + /* + * Don't let an unprivileged task work around + * the no_new_privs restriction by creating + * a thread that sets it up, enters seccomp, + * then dies. + */ + if (task_no_new_privs(caller)) + task_set_no_new_privs(thread); + + seccomp_assign_mode(thread, SECCOMP_MODE_FILTER); + } + } +} + /** * seccomp_prepare_filter: Prepares a seccomp filter for use. * @fprog: BPF program to install @@ -382,6 +491,15 @@ static long seccomp_attach_filter(unsigned int flags, if (total_insns > MAX_INSNS_PER_PATH) return -ENOMEM; + /* If thread sync has been requested, check that it is possible. */ + if (flags & SECCOMP_FILTER_FLAG_TSYNC) { + int ret; + + ret = seccomp_can_sync_threads(); + if (ret) + return ret; + } + /* * If there is an existing filter, make it the prev and don't drop its * task reference. @@ -389,6 +507,10 @@ static long seccomp_attach_filter(unsigned int flags, filter->prev = current->seccomp.filter; current->seccomp.filter = filter; + /* Now that the new filter is in place, synchronize to all threads. */ + if (flags & SECCOMP_FILTER_FLAG_TSYNC) + seccomp_sync_threads(); + return 0; } @@ -608,7 +730,7 @@ static long seccomp_set_mode_filter(unsigned int flags, long ret = -EINVAL; /* Validate flags. */ - if (flags != 0) + if (flags & ~SECCOMP_FILTER_FLAG_MASK) return -EINVAL; /* Prepare the new filter before holding any locks. */ @@ -616,6 +738,14 @@ static long seccomp_set_mode_filter(unsigned int flags, if (IS_ERR(prepared)) return PTR_ERR(prepared); + /* + * Make sure we cannot change seccomp or nnp state via TSYNC + * while another thread is in the middle of calling exec. + */ + if (flags & SECCOMP_FILTER_FLAG_TSYNC && + mutex_lock_killable(¤t->signal->cred_guard_mutex)) + goto out_free; + spin_lock_irq(¤t->sighand->siglock); if (!seccomp_may_assign_mode(seccomp_mode)) @@ -630,6 +760,9 @@ static long seccomp_set_mode_filter(unsigned int flags, seccomp_assign_mode(current, seccomp_mode); out: spin_unlock_irq(¤t->sighand->siglock); + if (flags & SECCOMP_FILTER_FLAG_TSYNC) + mutex_unlock(¤t->signal->cred_guard_mutex); +out_free: seccomp_filter_free(prepared); return ret; } From f56b1aa3553e8e75178a8f61f13d6d1ab896143c Mon Sep 17 00:00:00 2001 From: JP Abgrall Date: Wed, 27 Aug 2014 20:30:29 -0700 Subject: [PATCH 51/82] arm: fixup NR_syscalls to accommodate the new seccomp syscall This belongs in commit: 83f1ccba87b06575966b65352db565c363af7bcf https://android-review.googlesource.com/#/c/104520 Change-Id: Id5037cbebac9b86c863da79c3b8729e627e65f8e Signed-off-by: JP Abgrall --- arch/arm/include/asm/unistd.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/include/asm/unistd.h b/arch/arm/include/asm/unistd.h index 141baa3f9a72..acabef1a75df 100644 --- a/arch/arm/include/asm/unistd.h +++ b/arch/arm/include/asm/unistd.h @@ -15,7 +15,7 @@ #include -#define __NR_syscalls (380) +#define __NR_syscalls (384) #define __ARM_NR_cmpxchg (__ARM_NR_BASE+0x00fff0) #define __ARCH_WANT_STAT64 From 42db8d1ff86afb45a5f1d4930e43042bd3c448bc Mon Sep 17 00:00:00 2001 From: JP Abgrall Date: Wed, 27 Aug 2014 19:07:30 -0700 Subject: [PATCH 52/82] arm64/crypto: SHA-1 using ARMv8 Crypto Extensions This patch adds support for the SHA-1 Secure Hash Algorithm for CPUs that have support for the SHA-1 part of the ARM v8 Crypto Extensions. Change-Id: I29fafd308e17aff6e0d59938c106fae6ad7fe78e Signed-off-by: Ard Biesheuvel Acked-by: Herbert Xu Conflicts: arch/arm64/Makefile --- arch/arm64/Kconfig | 3 + arch/arm64/Makefile | 1 + arch/arm64/crypto/Kconfig | 16 +++ arch/arm64/crypto/Makefile | 12 +++ arch/arm64/crypto/sha1-ce-core.S | 153 +++++++++++++++++++++++++++ arch/arm64/crypto/sha1-ce-glue.c | 174 +++++++++++++++++++++++++++++++ 6 files changed, 359 insertions(+) create mode 100644 arch/arm64/crypto/Kconfig create mode 100644 arch/arm64/crypto/Makefile create mode 100644 arch/arm64/crypto/sha1-ce-core.S create mode 100644 arch/arm64/crypto/sha1-ce-glue.c diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 02f4d1cfd7b3..8832ac02ffef 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -273,5 +273,8 @@ source "arch/arm64/Kconfig.debug" source "security/Kconfig" source "crypto/Kconfig" +if CRYPTO +source "arch/arm64/crypto/Kconfig" +endif source "lib/Kconfig" diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index d69354dbd789..fb8c8f0fbf4f 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -37,6 +37,7 @@ TEXT_OFFSET := 0x00080000 export TEXT_OFFSET GZFLAGS core-y += arch/arm64/kernel/ arch/arm64/mm/ +core-$(CONFIG_CRYPTO) += arch/arm64/crypto/ libs-y := arch/arm64/lib/ $(libs-y) libs-y += $(LIBGCC) diff --git a/arch/arm64/crypto/Kconfig b/arch/arm64/crypto/Kconfig new file mode 100644 index 000000000000..7956881b5986 --- /dev/null +++ b/arch/arm64/crypto/Kconfig @@ -0,0 +1,16 @@ + +menuconfig ARM64_CRYPTO + bool "ARM64 Accelerated Cryptographic Algorithms" + depends on ARM64 + help + Say Y here to choose from a selection of cryptographic algorithms + implemented using ARM64 specific CPU features or instructions. + +if ARM64_CRYPTO + +config CRYPTO_SHA1_ARM64_CE + tristate "SHA-1 digest algorithm (ARMv8 Crypto Extensions)" + depends on ARM64 && KERNEL_MODE_NEON + select CRYPTO_HASH + +endif diff --git a/arch/arm64/crypto/Makefile b/arch/arm64/crypto/Makefile new file mode 100644 index 000000000000..0ed3caaec81b --- /dev/null +++ b/arch/arm64/crypto/Makefile @@ -0,0 +1,12 @@ +# +# linux/arch/arm64/crypto/Makefile +# +# Copyright (C) 2014 Linaro Ltd +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. +# + +obj-$(CONFIG_CRYPTO_SHA1_ARM64_CE) += sha1-ce.o +sha1-ce-y := sha1-ce-glue.o sha1-ce-core.o diff --git a/arch/arm64/crypto/sha1-ce-core.S b/arch/arm64/crypto/sha1-ce-core.S new file mode 100644 index 000000000000..09d57d98609c --- /dev/null +++ b/arch/arm64/crypto/sha1-ce-core.S @@ -0,0 +1,153 @@ +/* + * sha1-ce-core.S - SHA-1 secure hash using ARMv8 Crypto Extensions + * + * Copyright (C) 2014 Linaro Ltd + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include + + .text + .arch armv8-a+crypto + + k0 .req v0 + k1 .req v1 + k2 .req v2 + k3 .req v3 + + t0 .req v4 + t1 .req v5 + + dga .req q6 + dgav .req v6 + dgb .req s7 + dgbv .req v7 + + dg0q .req q12 + dg0s .req s12 + dg0v .req v12 + dg1s .req s13 + dg1v .req v13 + dg2s .req s14 + + .macro add_only, op, ev, rc, s0, dg1 + .ifc \ev, ev + add t1.4s, v\s0\().4s, \rc\().4s + sha1h dg2s, dg0s + .ifnb \dg1 + sha1\op dg0q, \dg1, t0.4s + .else + sha1\op dg0q, dg1s, t0.4s + .endif + .else + .ifnb \s0 + add t0.4s, v\s0\().4s, \rc\().4s + .endif + sha1h dg1s, dg0s + sha1\op dg0q, dg2s, t1.4s + .endif + .endm + + .macro add_update, op, ev, rc, s0, s1, s2, s3, dg1 + sha1su0 v\s0\().4s, v\s1\().4s, v\s2\().4s + add_only \op, \ev, \rc, \s1, \dg1 + sha1su1 v\s0\().4s, v\s3\().4s + .endm + + /* + * The SHA1 round constants + */ + .align 4 +.Lsha1_rcon: + .word 0x5a827999, 0x6ed9eba1, 0x8f1bbcdc, 0xca62c1d6 + + /* + * void sha1_ce_transform(int blocks, u8 const *src, u32 *state, + * u8 *head, long bytes) + */ +ENTRY(sha1_ce_transform) + /* load round constants */ + adr x6, .Lsha1_rcon + ld1r {k0.4s}, [x6], #4 + ld1r {k1.4s}, [x6], #4 + ld1r {k2.4s}, [x6], #4 + ld1r {k3.4s}, [x6] + + /* load state */ + ldr dga, [x2] + ldr dgb, [x2, #16] + + /* load partial state (if supplied) */ + cbz x3, 0f + ld1 {v8.4s-v11.4s}, [x3] + b 1f + + /* load input */ +0: ld1 {v8.4s-v11.4s}, [x1], #64 + sub w0, w0, #1 + +1: +CPU_LE( rev32 v8.16b, v8.16b ) +CPU_LE( rev32 v9.16b, v9.16b ) +CPU_LE( rev32 v10.16b, v10.16b ) +CPU_LE( rev32 v11.16b, v11.16b ) + +2: add t0.4s, v8.4s, k0.4s + mov dg0v.16b, dgav.16b + + add_update c, ev, k0, 8, 9, 10, 11, dgb + add_update c, od, k0, 9, 10, 11, 8 + add_update c, ev, k0, 10, 11, 8, 9 + add_update c, od, k0, 11, 8, 9, 10 + add_update c, ev, k1, 8, 9, 10, 11 + + add_update p, od, k1, 9, 10, 11, 8 + add_update p, ev, k1, 10, 11, 8, 9 + add_update p, od, k1, 11, 8, 9, 10 + add_update p, ev, k1, 8, 9, 10, 11 + add_update p, od, k2, 9, 10, 11, 8 + + add_update m, ev, k2, 10, 11, 8, 9 + add_update m, od, k2, 11, 8, 9, 10 + add_update m, ev, k2, 8, 9, 10, 11 + add_update m, od, k2, 9, 10, 11, 8 + add_update m, ev, k3, 10, 11, 8, 9 + + add_update p, od, k3, 11, 8, 9, 10 + add_only p, ev, k3, 9 + add_only p, od, k3, 10 + add_only p, ev, k3, 11 + add_only p, od + + /* update state */ + add dgbv.2s, dgbv.2s, dg1v.2s + add dgav.4s, dgav.4s, dg0v.4s + + cbnz w0, 0b + + /* + * Final block: add padding and total bit count. + * Skip if we have no total byte count in x4. In that case, the input + * size was not a round multiple of the block size, and the padding is + * handled by the C code. + */ + cbz x4, 3f + movi v9.2d, #0 + mov x8, #0x80000000 + movi v10.2d, #0 + ror x7, x4, #29 // ror(lsl(x4, 3), 32) + fmov d8, x8 + mov x4, #0 + mov v11.d[0], xzr + mov v11.d[1], x7 + b 2b + + /* store new state */ +3: str dga, [x2] + str dgb, [x2, #16] + ret +ENDPROC(sha1_ce_transform) diff --git a/arch/arm64/crypto/sha1-ce-glue.c b/arch/arm64/crypto/sha1-ce-glue.c new file mode 100644 index 000000000000..6fe83f37a750 --- /dev/null +++ b/arch/arm64/crypto/sha1-ce-glue.c @@ -0,0 +1,174 @@ +/* + * sha1-ce-glue.c - SHA-1 secure hash using ARMv8 Crypto Extensions + * + * Copyright (C) 2014 Linaro Ltd + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include + +MODULE_DESCRIPTION("SHA1 secure hash using ARMv8 Crypto Extensions"); +MODULE_AUTHOR("Ard Biesheuvel "); +MODULE_LICENSE("GPL v2"); + +asmlinkage void sha1_ce_transform(int blocks, u8 const *src, u32 *state, + u8 *head, long bytes); + +static int sha1_init(struct shash_desc *desc) +{ + struct sha1_state *sctx = shash_desc_ctx(desc); + + *sctx = (struct sha1_state){ + .state = { SHA1_H0, SHA1_H1, SHA1_H2, SHA1_H3, SHA1_H4 }, + }; + return 0; +} + +static int sha1_update(struct shash_desc *desc, const u8 *data, + unsigned int len) +{ + struct sha1_state *sctx = shash_desc_ctx(desc); + unsigned int partial = sctx->count % SHA1_BLOCK_SIZE; + + sctx->count += len; + + if ((partial + len) >= SHA1_BLOCK_SIZE) { + int blocks; + + if (partial) { + int p = SHA1_BLOCK_SIZE - partial; + + memcpy(sctx->buffer + partial, data, p); + data += p; + len -= p; + } + + blocks = len / SHA1_BLOCK_SIZE; + len %= SHA1_BLOCK_SIZE; + + kernel_neon_begin_partial(16); + sha1_ce_transform(blocks, data, sctx->state, + partial ? sctx->buffer : NULL, 0); + kernel_neon_end(); + + data += blocks * SHA1_BLOCK_SIZE; + partial = 0; + } + if (len) + memcpy(sctx->buffer + partial, data, len); + return 0; +} + +static int sha1_final(struct shash_desc *desc, u8 *out) +{ + static const u8 padding[SHA1_BLOCK_SIZE] = { 0x80, }; + + struct sha1_state *sctx = shash_desc_ctx(desc); + __be64 bits = cpu_to_be64(sctx->count << 3); + __be32 *dst = (__be32 *)out; + int i; + + u32 padlen = SHA1_BLOCK_SIZE + - ((sctx->count + sizeof(bits)) % SHA1_BLOCK_SIZE); + + sha1_update(desc, padding, padlen); + sha1_update(desc, (const u8 *)&bits, sizeof(bits)); + + for (i = 0; i < SHA1_DIGEST_SIZE / sizeof(__be32); i++) + put_unaligned_be32(sctx->state[i], dst++); + + *sctx = (struct sha1_state){}; + return 0; +} + +static int sha1_finup(struct shash_desc *desc, const u8 *data, + unsigned int len, u8 *out) +{ + struct sha1_state *sctx = shash_desc_ctx(desc); + __be32 *dst = (__be32 *)out; + int blocks; + int i; + + if (sctx->count || !len || (len % SHA1_BLOCK_SIZE)) { + sha1_update(desc, data, len); + return sha1_final(desc, out); + } + + /* + * Use a fast path if the input is a multiple of 64 bytes. In + * this case, there is no need to copy data around, and we can + * perform the entire digest calculation in a single invocation + * of sha1_ce_transform() + */ + blocks = len / SHA1_BLOCK_SIZE; + + kernel_neon_begin_partial(16); + sha1_ce_transform(blocks, data, sctx->state, NULL, len); + kernel_neon_end(); + + for (i = 0; i < SHA1_DIGEST_SIZE / sizeof(__be32); i++) + put_unaligned_be32(sctx->state[i], dst++); + + *sctx = (struct sha1_state){}; + return 0; +} + +static int sha1_export(struct shash_desc *desc, void *out) +{ + struct sha1_state *sctx = shash_desc_ctx(desc); + struct sha1_state *dst = out; + + *dst = *sctx; + return 0; +} + +static int sha1_import(struct shash_desc *desc, const void *in) +{ + struct sha1_state *sctx = shash_desc_ctx(desc); + struct sha1_state const *src = in; + + *sctx = *src; + return 0; +} + +static struct shash_alg alg = { + .init = sha1_init, + .update = sha1_update, + .final = sha1_final, + .finup = sha1_finup, + .export = sha1_export, + .import = sha1_import, + .descsize = sizeof(struct sha1_state), + .digestsize = SHA1_DIGEST_SIZE, + .statesize = sizeof(struct sha1_state), + .base = { + .cra_name = "sha1", + .cra_driver_name = "sha1-ce", + .cra_priority = 200, + .cra_flags = CRYPTO_ALG_TYPE_SHASH, + .cra_blocksize = SHA1_BLOCK_SIZE, + .cra_module = THIS_MODULE, + } +}; + +static int __init sha1_ce_mod_init(void) +{ + return crypto_register_shash(&alg); +} + +static void __exit sha1_ce_mod_fini(void) +{ + crypto_unregister_shash(&alg); +} + +module_cpu_feature_match(SHA1, sha1_ce_mod_init); +module_exit(sha1_ce_mod_fini); From 77278cdb9d7f93f00c0b43b544daf73b755e2dcd Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Thu, 20 Mar 2014 15:35:40 +0100 Subject: [PATCH 53/82] arm64/crypto: SHA-224/SHA-256 using ARMv8 Crypto Extensions This patch adds support for the SHA-224 and SHA-256 Secure Hash Algorithms for CPUs that have support for the SHA-2 part of the ARM v8 Crypto Extensions. Signed-off-by: Ard Biesheuvel Acked-by: Herbert Xu --- arch/arm64/crypto/Kconfig | 5 + arch/arm64/crypto/Makefile | 3 + arch/arm64/crypto/sha2-ce-core.S | 156 +++++++++++++++++++ arch/arm64/crypto/sha2-ce-glue.c | 255 +++++++++++++++++++++++++++++++ 4 files changed, 419 insertions(+) create mode 100644 arch/arm64/crypto/sha2-ce-core.S create mode 100644 arch/arm64/crypto/sha2-ce-glue.c diff --git a/arch/arm64/crypto/Kconfig b/arch/arm64/crypto/Kconfig index 7956881b5986..eb1e99770c21 100644 --- a/arch/arm64/crypto/Kconfig +++ b/arch/arm64/crypto/Kconfig @@ -13,4 +13,9 @@ config CRYPTO_SHA1_ARM64_CE depends on ARM64 && KERNEL_MODE_NEON select CRYPTO_HASH +config CRYPTO_SHA2_ARM64_CE + tristate "SHA-224/SHA-256 digest algorithm (ARMv8 Crypto Extensions)" + depends on ARM64 && KERNEL_MODE_NEON + select CRYPTO_HASH + endif diff --git a/arch/arm64/crypto/Makefile b/arch/arm64/crypto/Makefile index 0ed3caaec81b..0b3885a60d43 100644 --- a/arch/arm64/crypto/Makefile +++ b/arch/arm64/crypto/Makefile @@ -10,3 +10,6 @@ obj-$(CONFIG_CRYPTO_SHA1_ARM64_CE) += sha1-ce.o sha1-ce-y := sha1-ce-glue.o sha1-ce-core.o + +obj-$(CONFIG_CRYPTO_SHA2_ARM64_CE) += sha2-ce.o +sha2-ce-y := sha2-ce-glue.o sha2-ce-core.o diff --git a/arch/arm64/crypto/sha2-ce-core.S b/arch/arm64/crypto/sha2-ce-core.S new file mode 100644 index 000000000000..7f29fc031ea8 --- /dev/null +++ b/arch/arm64/crypto/sha2-ce-core.S @@ -0,0 +1,156 @@ +/* + * sha2-ce-core.S - core SHA-224/SHA-256 transform using v8 Crypto Extensions + * + * Copyright (C) 2014 Linaro Ltd + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include + + .text + .arch armv8-a+crypto + + dga .req q20 + dgav .req v20 + dgb .req q21 + dgbv .req v21 + + t0 .req v22 + t1 .req v23 + + dg0q .req q24 + dg0v .req v24 + dg1q .req q25 + dg1v .req v25 + dg2q .req q26 + dg2v .req v26 + + .macro add_only, ev, rc, s0 + mov dg2v.16b, dg0v.16b + .ifeq \ev + add t1.4s, v\s0\().4s, \rc\().4s + sha256h dg0q, dg1q, t0.4s + sha256h2 dg1q, dg2q, t0.4s + .else + .ifnb \s0 + add t0.4s, v\s0\().4s, \rc\().4s + .endif + sha256h dg0q, dg1q, t1.4s + sha256h2 dg1q, dg2q, t1.4s + .endif + .endm + + .macro add_update, ev, rc, s0, s1, s2, s3 + sha256su0 v\s0\().4s, v\s1\().4s + add_only \ev, \rc, \s1 + sha256su1 v\s0\().4s, v\s2\().4s, v\s3\().4s + .endm + + /* + * The SHA-256 round constants + */ + .align 4 +.Lsha2_rcon: + .word 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5 + .word 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5 + .word 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3 + .word 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174 + .word 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc + .word 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da + .word 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7 + .word 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967 + .word 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13 + .word 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85 + .word 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3 + .word 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070 + .word 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5 + .word 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3 + .word 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208 + .word 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2 + + /* + * void sha2_ce_transform(int blocks, u8 const *src, u32 *state, + * u8 *head, long bytes) + */ +ENTRY(sha2_ce_transform) + /* load round constants */ + adr x8, .Lsha2_rcon + ld1 { v0.4s- v3.4s}, [x8], #64 + ld1 { v4.4s- v7.4s}, [x8], #64 + ld1 { v8.4s-v11.4s}, [x8], #64 + ld1 {v12.4s-v15.4s}, [x8] + + /* load state */ + ldp dga, dgb, [x2] + + /* load partial input (if supplied) */ + cbz x3, 0f + ld1 {v16.4s-v19.4s}, [x3] + b 1f + + /* load input */ +0: ld1 {v16.4s-v19.4s}, [x1], #64 + sub w0, w0, #1 + +1: +CPU_LE( rev32 v16.16b, v16.16b ) +CPU_LE( rev32 v17.16b, v17.16b ) +CPU_LE( rev32 v18.16b, v18.16b ) +CPU_LE( rev32 v19.16b, v19.16b ) + +2: add t0.4s, v16.4s, v0.4s + mov dg0v.16b, dgav.16b + mov dg1v.16b, dgbv.16b + + add_update 0, v1, 16, 17, 18, 19 + add_update 1, v2, 17, 18, 19, 16 + add_update 0, v3, 18, 19, 16, 17 + add_update 1, v4, 19, 16, 17, 18 + + add_update 0, v5, 16, 17, 18, 19 + add_update 1, v6, 17, 18, 19, 16 + add_update 0, v7, 18, 19, 16, 17 + add_update 1, v8, 19, 16, 17, 18 + + add_update 0, v9, 16, 17, 18, 19 + add_update 1, v10, 17, 18, 19, 16 + add_update 0, v11, 18, 19, 16, 17 + add_update 1, v12, 19, 16, 17, 18 + + add_only 0, v13, 17 + add_only 1, v14, 18 + add_only 0, v15, 19 + add_only 1 + + /* update state */ + add dgav.4s, dgav.4s, dg0v.4s + add dgbv.4s, dgbv.4s, dg1v.4s + + /* handled all input blocks? */ + cbnz w0, 0b + + /* + * Final block: add padding and total bit count. + * Skip if we have no total byte count in x4. In that case, the input + * size was not a round multiple of the block size, and the padding is + * handled by the C code. + */ + cbz x4, 3f + movi v17.2d, #0 + mov x8, #0x80000000 + movi v18.2d, #0 + ror x7, x4, #29 // ror(lsl(x4, 3), 32) + fmov d16, x8 + mov x4, #0 + mov v19.d[0], xzr + mov v19.d[1], x7 + b 2b + + /* store new state */ +3: stp dga, dgb, [x2] + ret +ENDPROC(sha2_ce_transform) diff --git a/arch/arm64/crypto/sha2-ce-glue.c b/arch/arm64/crypto/sha2-ce-glue.c new file mode 100644 index 000000000000..c294e67d3925 --- /dev/null +++ b/arch/arm64/crypto/sha2-ce-glue.c @@ -0,0 +1,255 @@ +/* + * sha2-ce-glue.c - SHA-224/SHA-256 using ARMv8 Crypto Extensions + * + * Copyright (C) 2014 Linaro Ltd + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include + +MODULE_DESCRIPTION("SHA-224/SHA-256 secure hash using ARMv8 Crypto Extensions"); +MODULE_AUTHOR("Ard Biesheuvel "); +MODULE_LICENSE("GPL v2"); + +asmlinkage int sha2_ce_transform(int blocks, u8 const *src, u32 *state, + u8 *head, long bytes); + +static int sha224_init(struct shash_desc *desc) +{ + struct sha256_state *sctx = shash_desc_ctx(desc); + + *sctx = (struct sha256_state){ + .state = { + SHA224_H0, SHA224_H1, SHA224_H2, SHA224_H3, + SHA224_H4, SHA224_H5, SHA224_H6, SHA224_H7, + } + }; + return 0; +} + +static int sha256_init(struct shash_desc *desc) +{ + struct sha256_state *sctx = shash_desc_ctx(desc); + + *sctx = (struct sha256_state){ + .state = { + SHA256_H0, SHA256_H1, SHA256_H2, SHA256_H3, + SHA256_H4, SHA256_H5, SHA256_H6, SHA256_H7, + } + }; + return 0; +} + +static int sha2_update(struct shash_desc *desc, const u8 *data, + unsigned int len) +{ + struct sha256_state *sctx = shash_desc_ctx(desc); + unsigned int partial = sctx->count % SHA256_BLOCK_SIZE; + + sctx->count += len; + + if ((partial + len) >= SHA256_BLOCK_SIZE) { + int blocks; + + if (partial) { + int p = SHA256_BLOCK_SIZE - partial; + + memcpy(sctx->buf + partial, data, p); + data += p; + len -= p; + } + + blocks = len / SHA256_BLOCK_SIZE; + len %= SHA256_BLOCK_SIZE; + + kernel_neon_begin_partial(28); + sha2_ce_transform(blocks, data, sctx->state, + partial ? sctx->buf : NULL, 0); + kernel_neon_end(); + + data += blocks * SHA256_BLOCK_SIZE; + partial = 0; + } + if (len) + memcpy(sctx->buf + partial, data, len); + return 0; +} + +static void sha2_final(struct shash_desc *desc) +{ + static const u8 padding[SHA256_BLOCK_SIZE] = { 0x80, }; + + struct sha256_state *sctx = shash_desc_ctx(desc); + __be64 bits = cpu_to_be64(sctx->count << 3); + u32 padlen = SHA256_BLOCK_SIZE + - ((sctx->count + sizeof(bits)) % SHA256_BLOCK_SIZE); + + sha2_update(desc, padding, padlen); + sha2_update(desc, (const u8 *)&bits, sizeof(bits)); +} + +static int sha224_final(struct shash_desc *desc, u8 *out) +{ + struct sha256_state *sctx = shash_desc_ctx(desc); + __be32 *dst = (__be32 *)out; + int i; + + sha2_final(desc); + + for (i = 0; i < SHA224_DIGEST_SIZE / sizeof(__be32); i++) + put_unaligned_be32(sctx->state[i], dst++); + + *sctx = (struct sha256_state){}; + return 0; +} + +static int sha256_final(struct shash_desc *desc, u8 *out) +{ + struct sha256_state *sctx = shash_desc_ctx(desc); + __be32 *dst = (__be32 *)out; + int i; + + sha2_final(desc); + + for (i = 0; i < SHA256_DIGEST_SIZE / sizeof(__be32); i++) + put_unaligned_be32(sctx->state[i], dst++); + + *sctx = (struct sha256_state){}; + return 0; +} + +static void sha2_finup(struct shash_desc *desc, const u8 *data, + unsigned int len) +{ + struct sha256_state *sctx = shash_desc_ctx(desc); + int blocks; + + if (sctx->count || !len || (len % SHA256_BLOCK_SIZE)) { + sha2_update(desc, data, len); + sha2_final(desc); + return; + } + + /* + * Use a fast path if the input is a multiple of 64 bytes. In + * this case, there is no need to copy data around, and we can + * perform the entire digest calculation in a single invocation + * of sha2_ce_transform() + */ + blocks = len / SHA256_BLOCK_SIZE; + + kernel_neon_begin_partial(28); + sha2_ce_transform(blocks, data, sctx->state, NULL, len); + kernel_neon_end(); + data += blocks * SHA256_BLOCK_SIZE; +} + +static int sha224_finup(struct shash_desc *desc, const u8 *data, + unsigned int len, u8 *out) +{ + struct sha256_state *sctx = shash_desc_ctx(desc); + __be32 *dst = (__be32 *)out; + int i; + + sha2_finup(desc, data, len); + + for (i = 0; i < SHA224_DIGEST_SIZE / sizeof(__be32); i++) + put_unaligned_be32(sctx->state[i], dst++); + + *sctx = (struct sha256_state){}; + return 0; +} + +static int sha256_finup(struct shash_desc *desc, const u8 *data, + unsigned int len, u8 *out) +{ + struct sha256_state *sctx = shash_desc_ctx(desc); + __be32 *dst = (__be32 *)out; + int i; + + sha2_finup(desc, data, len); + + for (i = 0; i < SHA256_DIGEST_SIZE / sizeof(__be32); i++) + put_unaligned_be32(sctx->state[i], dst++); + + *sctx = (struct sha256_state){}; + return 0; +} + +static int sha2_export(struct shash_desc *desc, void *out) +{ + struct sha256_state *sctx = shash_desc_ctx(desc); + struct sha256_state *dst = out; + + *dst = *sctx; + return 0; +} + +static int sha2_import(struct shash_desc *desc, const void *in) +{ + struct sha256_state *sctx = shash_desc_ctx(desc); + struct sha256_state const *src = in; + + *sctx = *src; + return 0; +} + +static struct shash_alg algs[] = { { + .init = sha224_init, + .update = sha2_update, + .final = sha224_final, + .finup = sha224_finup, + .export = sha2_export, + .import = sha2_import, + .descsize = sizeof(struct sha256_state), + .digestsize = SHA224_DIGEST_SIZE, + .statesize = sizeof(struct sha256_state), + .base = { + .cra_name = "sha224", + .cra_driver_name = "sha224-ce", + .cra_priority = 200, + .cra_flags = CRYPTO_ALG_TYPE_SHASH, + .cra_blocksize = SHA256_BLOCK_SIZE, + .cra_module = THIS_MODULE, + } +}, { + .init = sha256_init, + .update = sha2_update, + .final = sha256_final, + .finup = sha256_finup, + .export = sha2_export, + .import = sha2_import, + .descsize = sizeof(struct sha256_state), + .digestsize = SHA256_DIGEST_SIZE, + .statesize = sizeof(struct sha256_state), + .base = { + .cra_name = "sha256", + .cra_driver_name = "sha256-ce", + .cra_priority = 200, + .cra_flags = CRYPTO_ALG_TYPE_SHASH, + .cra_blocksize = SHA256_BLOCK_SIZE, + .cra_module = THIS_MODULE, + } +} }; + +static int __init sha2_ce_mod_init(void) +{ + return crypto_register_shashes(algs, ARRAY_SIZE(algs)); +} + +static void __exit sha2_ce_mod_fini(void) +{ + crypto_unregister_shashes(algs, ARRAY_SIZE(algs)); +} + +module_cpu_feature_match(SHA2, sha2_ce_mod_init); +module_exit(sha2_ce_mod_fini); From 0224264fee47605f30771a988448375a0f84b5e9 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 26 Mar 2014 20:53:05 +0100 Subject: [PATCH 54/82] arm64/crypto: GHASH secure hash using ARMv8 Crypto Extensions This is a port to ARMv8 (Crypto Extensions) of the Intel implementation of the GHASH Secure Hash (used in the Galois/Counter chaining mode). It relies on the optional PMULL/PMULL2 instruction (polynomial multiply long, what Intel call carry-less multiply). Signed-off-by: Ard Biesheuvel Acked-by: Herbert Xu --- arch/arm64/crypto/Kconfig | 6 ++ arch/arm64/crypto/Makefile | 3 + arch/arm64/crypto/ghash-ce-core.S | 95 ++++++++++++++++++ arch/arm64/crypto/ghash-ce-glue.c | 155 ++++++++++++++++++++++++++++++ 4 files changed, 259 insertions(+) create mode 100644 arch/arm64/crypto/ghash-ce-core.S create mode 100644 arch/arm64/crypto/ghash-ce-glue.c diff --git a/arch/arm64/crypto/Kconfig b/arch/arm64/crypto/Kconfig index eb1e99770c21..0c50859ee7b9 100644 --- a/arch/arm64/crypto/Kconfig +++ b/arch/arm64/crypto/Kconfig @@ -18,4 +18,10 @@ config CRYPTO_SHA2_ARM64_CE depends on ARM64 && KERNEL_MODE_NEON select CRYPTO_HASH + +config CRYPTO_GHASH_ARM64_CE + tristate "GHASH (for GCM chaining mode) using ARMv8 Crypto Extensions" + depends on ARM64 && KERNEL_MODE_NEON + select CRYPTO_HASH + endif diff --git a/arch/arm64/crypto/Makefile b/arch/arm64/crypto/Makefile index 0b3885a60d43..e8c81a068868 100644 --- a/arch/arm64/crypto/Makefile +++ b/arch/arm64/crypto/Makefile @@ -13,3 +13,6 @@ sha1-ce-y := sha1-ce-glue.o sha1-ce-core.o obj-$(CONFIG_CRYPTO_SHA2_ARM64_CE) += sha2-ce.o sha2-ce-y := sha2-ce-glue.o sha2-ce-core.o + +obj-$(CONFIG_CRYPTO_GHASH_ARM64_CE) += ghash-ce.o +ghash-ce-y := ghash-ce-glue.o ghash-ce-core.o diff --git a/arch/arm64/crypto/ghash-ce-core.S b/arch/arm64/crypto/ghash-ce-core.S new file mode 100644 index 000000000000..b9e6eaf41c9b --- /dev/null +++ b/arch/arm64/crypto/ghash-ce-core.S @@ -0,0 +1,95 @@ +/* + * Accelerated GHASH implementation with ARMv8 PMULL instructions. + * + * Copyright (C) 2014 Linaro Ltd. + * + * Based on arch/x86/crypto/ghash-pmullni-intel_asm.S + * + * Copyright (c) 2009 Intel Corp. + * Author: Huang Ying + * Vinodh Gopal + * Erdinc Ozturk + * Deniz Karakoyunlu + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + */ + +#include +#include + + DATA .req v0 + SHASH .req v1 + IN1 .req v2 + T1 .req v2 + T2 .req v3 + T3 .req v4 + VZR .req v5 + + .text + .arch armv8-a+crypto + + /* + * void pmull_ghash_update(int blocks, u64 dg[], const char *src, + * struct ghash_key const *k, const char *head) + */ +ENTRY(pmull_ghash_update) + ld1 {DATA.16b}, [x1] + ld1 {SHASH.16b}, [x3] + eor VZR.16b, VZR.16b, VZR.16b + + /* do the head block first, if supplied */ + cbz x4, 0f + ld1 {IN1.2d}, [x4] + b 1f + +0: ld1 {IN1.2d}, [x2], #16 + sub w0, w0, #1 +1: ext IN1.16b, IN1.16b, IN1.16b, #8 +CPU_LE( rev64 IN1.16b, IN1.16b ) + eor DATA.16b, DATA.16b, IN1.16b + + /* multiply DATA by SHASH in GF(2^128) */ + ext T2.16b, DATA.16b, DATA.16b, #8 + ext T3.16b, SHASH.16b, SHASH.16b, #8 + eor T2.16b, T2.16b, DATA.16b + eor T3.16b, T3.16b, SHASH.16b + + pmull2 T1.1q, SHASH.2d, DATA.2d // a1 * b1 + pmull DATA.1q, SHASH.1d, DATA.1d // a0 * b0 + pmull T2.1q, T2.1d, T3.1d // (a1 + a0)(b1 + b0) + eor T2.16b, T2.16b, T1.16b // (a0 * b1) + (a1 * b0) + eor T2.16b, T2.16b, DATA.16b + + ext T3.16b, VZR.16b, T2.16b, #8 + ext T2.16b, T2.16b, VZR.16b, #8 + eor DATA.16b, DATA.16b, T3.16b + eor T1.16b, T1.16b, T2.16b // is result of + // carry-less multiplication + + /* first phase of the reduction */ + shl T3.2d, DATA.2d, #1 + eor T3.16b, T3.16b, DATA.16b + shl T3.2d, T3.2d, #5 + eor T3.16b, T3.16b, DATA.16b + shl T3.2d, T3.2d, #57 + ext T2.16b, VZR.16b, T3.16b, #8 + ext T3.16b, T3.16b, VZR.16b, #8 + eor DATA.16b, DATA.16b, T2.16b + eor T1.16b, T1.16b, T3.16b + + /* second phase of the reduction */ + ushr T2.2d, DATA.2d, #5 + eor T2.16b, T2.16b, DATA.16b + ushr T2.2d, T2.2d, #1 + eor T2.16b, T2.16b, DATA.16b + ushr T2.2d, T2.2d, #1 + eor T1.16b, T1.16b, T2.16b + eor DATA.16b, DATA.16b, T1.16b + + cbnz w0, 0b + + st1 {DATA.16b}, [x1] + ret +ENDPROC(pmull_ghash_update) diff --git a/arch/arm64/crypto/ghash-ce-glue.c b/arch/arm64/crypto/ghash-ce-glue.c new file mode 100644 index 000000000000..b92baf3f68c7 --- /dev/null +++ b/arch/arm64/crypto/ghash-ce-glue.c @@ -0,0 +1,155 @@ +/* + * Accelerated GHASH implementation with ARMv8 PMULL instructions. + * + * Copyright (C) 2014 Linaro Ltd. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include + +MODULE_DESCRIPTION("GHASH secure hash using ARMv8 Crypto Extensions"); +MODULE_AUTHOR("Ard Biesheuvel "); +MODULE_LICENSE("GPL v2"); + +#define GHASH_BLOCK_SIZE 16 +#define GHASH_DIGEST_SIZE 16 + +struct ghash_key { + u64 a; + u64 b; +}; + +struct ghash_desc_ctx { + u64 digest[GHASH_DIGEST_SIZE/sizeof(u64)]; + u8 buf[GHASH_BLOCK_SIZE]; + u32 count; +}; + +asmlinkage void pmull_ghash_update(int blocks, u64 dg[], const char *src, + struct ghash_key const *k, const char *head); + +static int ghash_init(struct shash_desc *desc) +{ + struct ghash_desc_ctx *ctx = shash_desc_ctx(desc); + + *ctx = (struct ghash_desc_ctx){}; + return 0; +} + +static int ghash_update(struct shash_desc *desc, const u8 *src, + unsigned int len) +{ + struct ghash_desc_ctx *ctx = shash_desc_ctx(desc); + unsigned int partial = ctx->count % GHASH_BLOCK_SIZE; + + ctx->count += len; + + if ((partial + len) >= GHASH_BLOCK_SIZE) { + struct ghash_key *key = crypto_shash_ctx(desc->tfm); + int blocks; + + if (partial) { + int p = GHASH_BLOCK_SIZE - partial; + + memcpy(ctx->buf + partial, src, p); + src += p; + len -= p; + } + + blocks = len / GHASH_BLOCK_SIZE; + len %= GHASH_BLOCK_SIZE; + + kernel_neon_begin_partial(6); + pmull_ghash_update(blocks, ctx->digest, src, key, + partial ? ctx->buf : NULL); + kernel_neon_end(); + src += blocks * GHASH_BLOCK_SIZE; + } + if (len) + memcpy(ctx->buf + partial, src, len); + return 0; +} + +static int ghash_final(struct shash_desc *desc, u8 *dst) +{ + struct ghash_desc_ctx *ctx = shash_desc_ctx(desc); + unsigned int partial = ctx->count % GHASH_BLOCK_SIZE; + + if (partial) { + struct ghash_key *key = crypto_shash_ctx(desc->tfm); + + memset(ctx->buf + partial, 0, GHASH_BLOCK_SIZE - partial); + + kernel_neon_begin_partial(6); + pmull_ghash_update(1, ctx->digest, ctx->buf, key, NULL); + kernel_neon_end(); + } + put_unaligned_be64(ctx->digest[1], dst); + put_unaligned_be64(ctx->digest[0], dst + 8); + + *ctx = (struct ghash_desc_ctx){}; + return 0; +} + +static int ghash_setkey(struct crypto_shash *tfm, + const u8 *inkey, unsigned int keylen) +{ + struct ghash_key *key = crypto_shash_ctx(tfm); + u64 a, b; + + if (keylen != GHASH_BLOCK_SIZE) { + crypto_shash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN); + return -EINVAL; + } + + /* perform multiplication by 'x' in GF(2^128) */ + b = get_unaligned_be64(inkey); + a = get_unaligned_be64(inkey + 8); + + key->a = (a << 1) | (b >> 63); + key->b = (b << 1) | (a >> 63); + + if (b >> 63) + key->b ^= 0xc200000000000000UL; + + return 0; +} + +static struct shash_alg ghash_alg = { + .digestsize = GHASH_DIGEST_SIZE, + .init = ghash_init, + .update = ghash_update, + .final = ghash_final, + .setkey = ghash_setkey, + .descsize = sizeof(struct ghash_desc_ctx), + .base = { + .cra_name = "ghash", + .cra_driver_name = "ghash-ce", + .cra_priority = 200, + .cra_flags = CRYPTO_ALG_TYPE_SHASH, + .cra_blocksize = GHASH_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct ghash_key), + .cra_module = THIS_MODULE, + }, +}; + +static int __init ghash_ce_mod_init(void) +{ + return crypto_register_shash(&ghash_alg); +} + +static void __exit ghash_ce_mod_exit(void) +{ + crypto_unregister_shash(&ghash_alg); +} + +module_cpu_feature_match(PMULL, ghash_ce_mod_init); +module_exit(ghash_ce_mod_exit); From b42ec6c2c7a7ea0ccfcdb580eeb3806311b3479b Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 5 Feb 2014 18:13:38 +0100 Subject: [PATCH 55/82] arm64/crypto: AES using ARMv8 Crypto Extensions This patch adds support for the AES symmetric encryption algorithm for CPUs that have support for the AES part of the ARM v8 Crypto Extensions. Signed-off-by: Ard Biesheuvel Acked-by: Herbert Xu --- arch/arm64/crypto/Kconfig | 7 +- arch/arm64/crypto/Makefile | 3 + arch/arm64/crypto/aes-ce-cipher.c | 155 ++++++++++++++++++++++++++++++ 3 files changed, 164 insertions(+), 1 deletion(-) create mode 100644 arch/arm64/crypto/aes-ce-cipher.c diff --git a/arch/arm64/crypto/Kconfig b/arch/arm64/crypto/Kconfig index 0c50859ee7b9..9ba32c0da871 100644 --- a/arch/arm64/crypto/Kconfig +++ b/arch/arm64/crypto/Kconfig @@ -18,10 +18,15 @@ config CRYPTO_SHA2_ARM64_CE depends on ARM64 && KERNEL_MODE_NEON select CRYPTO_HASH - config CRYPTO_GHASH_ARM64_CE tristate "GHASH (for GCM chaining mode) using ARMv8 Crypto Extensions" depends on ARM64 && KERNEL_MODE_NEON select CRYPTO_HASH +config CRYPTO_AES_ARM64_CE + tristate "AES core cipher using ARMv8 Crypto Extensions" + depends on ARM64 && KERNEL_MODE_NEON + select CRYPTO_ALGAPI + select CRYPTO_AES + endif diff --git a/arch/arm64/crypto/Makefile b/arch/arm64/crypto/Makefile index e8c81a068868..908abd9242b1 100644 --- a/arch/arm64/crypto/Makefile +++ b/arch/arm64/crypto/Makefile @@ -16,3 +16,6 @@ sha2-ce-y := sha2-ce-glue.o sha2-ce-core.o obj-$(CONFIG_CRYPTO_GHASH_ARM64_CE) += ghash-ce.o ghash-ce-y := ghash-ce-glue.o ghash-ce-core.o + +obj-$(CONFIG_CRYPTO_AES_ARM64_CE) += aes-ce-cipher.o +CFLAGS_aes-ce-cipher.o += -march=armv8-a+crypto diff --git a/arch/arm64/crypto/aes-ce-cipher.c b/arch/arm64/crypto/aes-ce-cipher.c new file mode 100644 index 000000000000..2075e1acae6b --- /dev/null +++ b/arch/arm64/crypto/aes-ce-cipher.c @@ -0,0 +1,155 @@ +/* + * aes-ce-cipher.c - core AES cipher using ARMv8 Crypto Extensions + * + * Copyright (C) 2013 - 2014 Linaro Ltd + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include + +MODULE_DESCRIPTION("Synchronous AES cipher using ARMv8 Crypto Extensions"); +MODULE_AUTHOR("Ard Biesheuvel "); +MODULE_LICENSE("GPL v2"); + +struct aes_block { + u8 b[AES_BLOCK_SIZE]; +}; + +static int num_rounds(struct crypto_aes_ctx *ctx) +{ + /* + * # of rounds specified by AES: + * 128 bit key 10 rounds + * 192 bit key 12 rounds + * 256 bit key 14 rounds + * => n byte key => 6 + (n/4) rounds + */ + return 6 + ctx->key_length / 4; +} + +static void aes_cipher_encrypt(struct crypto_tfm *tfm, u8 dst[], u8 const src[]) +{ + struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm); + struct aes_block *out = (struct aes_block *)dst; + struct aes_block const *in = (struct aes_block *)src; + void *dummy0; + int dummy1; + + kernel_neon_begin_partial(4); + + __asm__(" ld1 {v0.16b}, %[in] ;" + " ld1 {v1.2d}, [%[key]], #16 ;" + " cmp %w[rounds], #10 ;" + " bmi 0f ;" + " bne 3f ;" + " mov v3.16b, v1.16b ;" + " b 2f ;" + "0: mov v2.16b, v1.16b ;" + " ld1 {v3.2d}, [%[key]], #16 ;" + "1: aese v0.16b, v2.16b ;" + " aesmc v0.16b, v0.16b ;" + "2: ld1 {v1.2d}, [%[key]], #16 ;" + " aese v0.16b, v3.16b ;" + " aesmc v0.16b, v0.16b ;" + "3: ld1 {v2.2d}, [%[key]], #16 ;" + " subs %w[rounds], %w[rounds], #3 ;" + " aese v0.16b, v1.16b ;" + " aesmc v0.16b, v0.16b ;" + " ld1 {v3.2d}, [%[key]], #16 ;" + " bpl 1b ;" + " aese v0.16b, v2.16b ;" + " eor v0.16b, v0.16b, v3.16b ;" + " st1 {v0.16b}, %[out] ;" + + : [out] "=Q"(*out), + [key] "=r"(dummy0), + [rounds] "=r"(dummy1) + : [in] "Q"(*in), + "1"(ctx->key_enc), + "2"(num_rounds(ctx) - 2) + : "cc"); + + kernel_neon_end(); +} + +static void aes_cipher_decrypt(struct crypto_tfm *tfm, u8 dst[], u8 const src[]) +{ + struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm); + struct aes_block *out = (struct aes_block *)dst; + struct aes_block const *in = (struct aes_block *)src; + void *dummy0; + int dummy1; + + kernel_neon_begin_partial(4); + + __asm__(" ld1 {v0.16b}, %[in] ;" + " ld1 {v1.2d}, [%[key]], #16 ;" + " cmp %w[rounds], #10 ;" + " bmi 0f ;" + " bne 3f ;" + " mov v3.16b, v1.16b ;" + " b 2f ;" + "0: mov v2.16b, v1.16b ;" + " ld1 {v3.2d}, [%[key]], #16 ;" + "1: aesd v0.16b, v2.16b ;" + " aesimc v0.16b, v0.16b ;" + "2: ld1 {v1.2d}, [%[key]], #16 ;" + " aesd v0.16b, v3.16b ;" + " aesimc v0.16b, v0.16b ;" + "3: ld1 {v2.2d}, [%[key]], #16 ;" + " subs %w[rounds], %w[rounds], #3 ;" + " aesd v0.16b, v1.16b ;" + " aesimc v0.16b, v0.16b ;" + " ld1 {v3.2d}, [%[key]], #16 ;" + " bpl 1b ;" + " aesd v0.16b, v2.16b ;" + " eor v0.16b, v0.16b, v3.16b ;" + " st1 {v0.16b}, %[out] ;" + + : [out] "=Q"(*out), + [key] "=r"(dummy0), + [rounds] "=r"(dummy1) + : [in] "Q"(*in), + "1"(ctx->key_dec), + "2"(num_rounds(ctx) - 2) + : "cc"); + + kernel_neon_end(); +} + +static struct crypto_alg aes_alg = { + .cra_name = "aes", + .cra_driver_name = "aes-ce", + .cra_priority = 300, + .cra_flags = CRYPTO_ALG_TYPE_CIPHER, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct crypto_aes_ctx), + .cra_module = THIS_MODULE, + .cra_cipher = { + .cia_min_keysize = AES_MIN_KEY_SIZE, + .cia_max_keysize = AES_MAX_KEY_SIZE, + .cia_setkey = crypto_aes_set_key, + .cia_encrypt = aes_cipher_encrypt, + .cia_decrypt = aes_cipher_decrypt + } +}; + +static int __init aes_mod_init(void) +{ + return crypto_register_alg(&aes_alg); +} + +static void __exit aes_mod_exit(void) +{ + crypto_unregister_alg(&aes_alg); +} + +module_cpu_feature_match(AES, aes_mod_init); +module_exit(aes_mod_exit); From d8cb4d92e89d59ccfddb38e0e07b3f70efae6754 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 10 Feb 2014 11:26:29 +0100 Subject: [PATCH 56/82] arm64/crypto: AES in CCM mode using ARMv8 Crypto Extensions This patch adds support for the AES-CCM encryption algorithm for CPUs that have support for the AES part of the ARM v8 Crypto Extensions. Signed-off-by: Ard Biesheuvel Acked-by: Herbert Xu --- arch/arm64/crypto/Kconfig | 7 + arch/arm64/crypto/Makefile | 3 + arch/arm64/crypto/aes-ce-ccm-core.S | 222 +++++++++++++++++++++ arch/arm64/crypto/aes-ce-ccm-glue.c | 297 ++++++++++++++++++++++++++++ 4 files changed, 529 insertions(+) create mode 100644 arch/arm64/crypto/aes-ce-ccm-core.S create mode 100644 arch/arm64/crypto/aes-ce-ccm-glue.c diff --git a/arch/arm64/crypto/Kconfig b/arch/arm64/crypto/Kconfig index 9ba32c0da871..8fffd5af65ef 100644 --- a/arch/arm64/crypto/Kconfig +++ b/arch/arm64/crypto/Kconfig @@ -29,4 +29,11 @@ config CRYPTO_AES_ARM64_CE select CRYPTO_ALGAPI select CRYPTO_AES +config CRYPTO_AES_ARM64_CE_CCM + tristate "AES in CCM mode using ARMv8 Crypto Extensions" + depends on ARM64 && KERNEL_MODE_NEON + select CRYPTO_ALGAPI + select CRYPTO_AES + select CRYPTO_AEAD + endif diff --git a/arch/arm64/crypto/Makefile b/arch/arm64/crypto/Makefile index 908abd9242b1..311287d68078 100644 --- a/arch/arm64/crypto/Makefile +++ b/arch/arm64/crypto/Makefile @@ -19,3 +19,6 @@ ghash-ce-y := ghash-ce-glue.o ghash-ce-core.o obj-$(CONFIG_CRYPTO_AES_ARM64_CE) += aes-ce-cipher.o CFLAGS_aes-ce-cipher.o += -march=armv8-a+crypto + +obj-$(CONFIG_CRYPTO_AES_ARM64_CE_CCM) += aes-ce-ccm.o +aes-ce-ccm-y := aes-ce-ccm-glue.o aes-ce-ccm-core.o diff --git a/arch/arm64/crypto/aes-ce-ccm-core.S b/arch/arm64/crypto/aes-ce-ccm-core.S new file mode 100644 index 000000000000..432e4841cd81 --- /dev/null +++ b/arch/arm64/crypto/aes-ce-ccm-core.S @@ -0,0 +1,222 @@ +/* + * aesce-ccm-core.S - AES-CCM transform for ARMv8 with Crypto Extensions + * + * Copyright (C) 2013 - 2014 Linaro Ltd + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include + + .text + .arch armv8-a+crypto + + /* + * void ce_aes_ccm_auth_data(u8 mac[], u8 const in[], u32 abytes, + * u32 *macp, u8 const rk[], u32 rounds); + */ +ENTRY(ce_aes_ccm_auth_data) + ldr w8, [x3] /* leftover from prev round? */ + ld1 {v0.2d}, [x0] /* load mac */ + cbz w8, 1f + sub w8, w8, #16 + eor v1.16b, v1.16b, v1.16b +0: ldrb w7, [x1], #1 /* get 1 byte of input */ + subs w2, w2, #1 + add w8, w8, #1 + ins v1.b[0], w7 + ext v1.16b, v1.16b, v1.16b, #1 /* rotate in the input bytes */ + beq 8f /* out of input? */ + cbnz w8, 0b + eor v0.16b, v0.16b, v1.16b +1: ld1 {v3.2d}, [x4] /* load first round key */ + prfm pldl1strm, [x1] + cmp w5, #12 /* which key size? */ + add x6, x4, #16 + sub w7, w5, #2 /* modified # of rounds */ + bmi 2f + bne 5f + mov v5.16b, v3.16b + b 4f +2: mov v4.16b, v3.16b + ld1 {v5.2d}, [x6], #16 /* load 2nd round key */ +3: aese v0.16b, v4.16b + aesmc v0.16b, v0.16b +4: ld1 {v3.2d}, [x6], #16 /* load next round key */ + aese v0.16b, v5.16b + aesmc v0.16b, v0.16b +5: ld1 {v4.2d}, [x6], #16 /* load next round key */ + subs w7, w7, #3 + aese v0.16b, v3.16b + aesmc v0.16b, v0.16b + ld1 {v5.2d}, [x6], #16 /* load next round key */ + bpl 3b + aese v0.16b, v4.16b + subs w2, w2, #16 /* last data? */ + eor v0.16b, v0.16b, v5.16b /* final round */ + bmi 6f + ld1 {v1.16b}, [x1], #16 /* load next input block */ + eor v0.16b, v0.16b, v1.16b /* xor with mac */ + bne 1b +6: st1 {v0.2d}, [x0] /* store mac */ + beq 10f + adds w2, w2, #16 + beq 10f + mov w8, w2 +7: ldrb w7, [x1], #1 + umov w6, v0.b[0] + eor w6, w6, w7 + strb w6, [x0], #1 + subs w2, w2, #1 + beq 10f + ext v0.16b, v0.16b, v0.16b, #1 /* rotate out the mac bytes */ + b 7b +8: mov w7, w8 + add w8, w8, #16 +9: ext v1.16b, v1.16b, v1.16b, #1 + adds w7, w7, #1 + bne 9b + eor v0.16b, v0.16b, v1.16b + st1 {v0.2d}, [x0] +10: str w8, [x3] + ret +ENDPROC(ce_aes_ccm_auth_data) + + /* + * void ce_aes_ccm_final(u8 mac[], u8 const ctr[], u8 const rk[], + * u32 rounds); + */ +ENTRY(ce_aes_ccm_final) + ld1 {v3.2d}, [x2], #16 /* load first round key */ + ld1 {v0.2d}, [x0] /* load mac */ + cmp w3, #12 /* which key size? */ + sub w3, w3, #2 /* modified # of rounds */ + ld1 {v1.2d}, [x1] /* load 1st ctriv */ + bmi 0f + bne 3f + mov v5.16b, v3.16b + b 2f +0: mov v4.16b, v3.16b +1: ld1 {v5.2d}, [x2], #16 /* load next round key */ + aese v0.16b, v4.16b + aese v1.16b, v4.16b + aesmc v0.16b, v0.16b + aesmc v1.16b, v1.16b +2: ld1 {v3.2d}, [x2], #16 /* load next round key */ + aese v0.16b, v5.16b + aese v1.16b, v5.16b + aesmc v0.16b, v0.16b + aesmc v1.16b, v1.16b +3: ld1 {v4.2d}, [x2], #16 /* load next round key */ + subs w3, w3, #3 + aese v0.16b, v3.16b + aese v1.16b, v3.16b + aesmc v0.16b, v0.16b + aesmc v1.16b, v1.16b + bpl 1b + aese v0.16b, v4.16b + aese v1.16b, v4.16b + /* final round key cancels out */ + eor v0.16b, v0.16b, v1.16b /* en-/decrypt the mac */ + st1 {v0.2d}, [x0] /* store result */ + ret +ENDPROC(ce_aes_ccm_final) + + .macro aes_ccm_do_crypt,enc + ldr x8, [x6, #8] /* load lower ctr */ + ld1 {v0.2d}, [x5] /* load mac */ + rev x8, x8 /* keep swabbed ctr in reg */ +0: /* outer loop */ + ld1 {v1.1d}, [x6] /* load upper ctr */ + prfm pldl1strm, [x1] + add x8, x8, #1 + rev x9, x8 + cmp w4, #12 /* which key size? */ + sub w7, w4, #2 /* get modified # of rounds */ + ins v1.d[1], x9 /* no carry in lower ctr */ + ld1 {v3.2d}, [x3] /* load first round key */ + add x10, x3, #16 + bmi 1f + bne 4f + mov v5.16b, v3.16b + b 3f +1: mov v4.16b, v3.16b + ld1 {v5.2d}, [x10], #16 /* load 2nd round key */ +2: /* inner loop: 3 rounds, 2x interleaved */ + aese v0.16b, v4.16b + aese v1.16b, v4.16b + aesmc v0.16b, v0.16b + aesmc v1.16b, v1.16b +3: ld1 {v3.2d}, [x10], #16 /* load next round key */ + aese v0.16b, v5.16b + aese v1.16b, v5.16b + aesmc v0.16b, v0.16b + aesmc v1.16b, v1.16b +4: ld1 {v4.2d}, [x10], #16 /* load next round key */ + subs w7, w7, #3 + aese v0.16b, v3.16b + aese v1.16b, v3.16b + aesmc v0.16b, v0.16b + aesmc v1.16b, v1.16b + ld1 {v5.2d}, [x10], #16 /* load next round key */ + bpl 2b + aese v0.16b, v4.16b + aese v1.16b, v4.16b + subs w2, w2, #16 + bmi 6f /* partial block? */ + ld1 {v2.16b}, [x1], #16 /* load next input block */ + .if \enc == 1 + eor v2.16b, v2.16b, v5.16b /* final round enc+mac */ + eor v1.16b, v1.16b, v2.16b /* xor with crypted ctr */ + .else + eor v2.16b, v2.16b, v1.16b /* xor with crypted ctr */ + eor v1.16b, v2.16b, v5.16b /* final round enc */ + .endif + eor v0.16b, v0.16b, v2.16b /* xor mac with pt ^ rk[last] */ + st1 {v1.16b}, [x0], #16 /* write output block */ + bne 0b + rev x8, x8 + st1 {v0.2d}, [x5] /* store mac */ + str x8, [x6, #8] /* store lsb end of ctr (BE) */ +5: ret + +6: eor v0.16b, v0.16b, v5.16b /* final round mac */ + eor v1.16b, v1.16b, v5.16b /* final round enc */ + st1 {v0.2d}, [x5] /* store mac */ + add w2, w2, #16 /* process partial tail block */ +7: ldrb w9, [x1], #1 /* get 1 byte of input */ + umov w6, v1.b[0] /* get top crypted ctr byte */ + umov w7, v0.b[0] /* get top mac byte */ + .if \enc == 1 + eor w7, w7, w9 + eor w9, w9, w6 + .else + eor w9, w9, w6 + eor w7, w7, w9 + .endif + strb w9, [x0], #1 /* store out byte */ + strb w7, [x5], #1 /* store mac byte */ + subs w2, w2, #1 + beq 5b + ext v0.16b, v0.16b, v0.16b, #1 /* shift out mac byte */ + ext v1.16b, v1.16b, v1.16b, #1 /* shift out ctr byte */ + b 7b + .endm + + /* + * void ce_aes_ccm_encrypt(u8 out[], u8 const in[], u32 cbytes, + * u8 const rk[], u32 rounds, u8 mac[], + * u8 ctr[]); + * void ce_aes_ccm_decrypt(u8 out[], u8 const in[], u32 cbytes, + * u8 const rk[], u32 rounds, u8 mac[], + * u8 ctr[]); + */ +ENTRY(ce_aes_ccm_encrypt) + aes_ccm_do_crypt 1 +ENDPROC(ce_aes_ccm_encrypt) + +ENTRY(ce_aes_ccm_decrypt) + aes_ccm_do_crypt 0 +ENDPROC(ce_aes_ccm_decrypt) diff --git a/arch/arm64/crypto/aes-ce-ccm-glue.c b/arch/arm64/crypto/aes-ce-ccm-glue.c new file mode 100644 index 000000000000..9e6cdde9b43d --- /dev/null +++ b/arch/arm64/crypto/aes-ce-ccm-glue.c @@ -0,0 +1,297 @@ +/* + * aes-ccm-glue.c - AES-CCM transform for ARMv8 with Crypto Extensions + * + * Copyright (C) 2013 - 2014 Linaro Ltd + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include + +static int num_rounds(struct crypto_aes_ctx *ctx) +{ + /* + * # of rounds specified by AES: + * 128 bit key 10 rounds + * 192 bit key 12 rounds + * 256 bit key 14 rounds + * => n byte key => 6 + (n/4) rounds + */ + return 6 + ctx->key_length / 4; +} + +asmlinkage void ce_aes_ccm_auth_data(u8 mac[], u8 const in[], u32 abytes, + u32 *macp, u32 const rk[], u32 rounds); + +asmlinkage void ce_aes_ccm_encrypt(u8 out[], u8 const in[], u32 cbytes, + u32 const rk[], u32 rounds, u8 mac[], + u8 ctr[]); + +asmlinkage void ce_aes_ccm_decrypt(u8 out[], u8 const in[], u32 cbytes, + u32 const rk[], u32 rounds, u8 mac[], + u8 ctr[]); + +asmlinkage void ce_aes_ccm_final(u8 mac[], u8 const ctr[], u32 const rk[], + u32 rounds); + +static int ccm_setkey(struct crypto_aead *tfm, const u8 *in_key, + unsigned int key_len) +{ + struct crypto_aes_ctx *ctx = crypto_aead_ctx(tfm); + int ret; + + ret = crypto_aes_expand_key(ctx, in_key, key_len); + if (!ret) + return 0; + + tfm->base.crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; + return -EINVAL; +} + +static int ccm_setauthsize(struct crypto_aead *tfm, unsigned int authsize) +{ + if ((authsize & 1) || authsize < 4) + return -EINVAL; + return 0; +} + +static int ccm_init_mac(struct aead_request *req, u8 maciv[], u32 msglen) +{ + struct crypto_aead *aead = crypto_aead_reqtfm(req); + __be32 *n = (__be32 *)&maciv[AES_BLOCK_SIZE - 8]; + u32 l = req->iv[0] + 1; + + /* verify that CCM dimension 'L' is set correctly in the IV */ + if (l < 2 || l > 8) + return -EINVAL; + + /* verify that msglen can in fact be represented in L bytes */ + if (l < 4 && msglen >> (8 * l)) + return -EOVERFLOW; + + /* + * Even if the CCM spec allows L values of up to 8, the Linux cryptoapi + * uses a u32 type to represent msglen so the top 4 bytes are always 0. + */ + n[0] = 0; + n[1] = cpu_to_be32(msglen); + + memcpy(maciv, req->iv, AES_BLOCK_SIZE - l); + + /* + * Meaning of byte 0 according to CCM spec (RFC 3610/NIST 800-38C) + * - bits 0..2 : max # of bytes required to represent msglen, minus 1 + * (already set by caller) + * - bits 3..5 : size of auth tag (1 => 4 bytes, 2 => 6 bytes, etc) + * - bit 6 : indicates presence of authenticate-only data + */ + maciv[0] |= (crypto_aead_authsize(aead) - 2) << 2; + if (req->assoclen) + maciv[0] |= 0x40; + + memset(&req->iv[AES_BLOCK_SIZE - l], 0, l); + return 0; +} + +static void ccm_calculate_auth_mac(struct aead_request *req, u8 mac[]) +{ + struct crypto_aead *aead = crypto_aead_reqtfm(req); + struct crypto_aes_ctx *ctx = crypto_aead_ctx(aead); + struct __packed { __be16 l; __be32 h; u16 len; } ltag; + struct scatter_walk walk; + u32 len = req->assoclen; + u32 macp = 0; + + /* prepend the AAD with a length tag */ + if (len < 0xff00) { + ltag.l = cpu_to_be16(len); + ltag.len = 2; + } else { + ltag.l = cpu_to_be16(0xfffe); + put_unaligned_be32(len, <ag.h); + ltag.len = 6; + } + + ce_aes_ccm_auth_data(mac, (u8 *)<ag, ltag.len, &macp, ctx->key_enc, + num_rounds(ctx)); + scatterwalk_start(&walk, req->assoc); + + do { + u32 n = scatterwalk_clamp(&walk, len); + u8 *p; + + if (!n) { + scatterwalk_start(&walk, sg_next(walk.sg)); + n = scatterwalk_clamp(&walk, len); + } + p = scatterwalk_map(&walk); + ce_aes_ccm_auth_data(mac, p, n, &macp, ctx->key_enc, + num_rounds(ctx)); + len -= n; + + scatterwalk_unmap(p); + scatterwalk_advance(&walk, n); + scatterwalk_done(&walk, 0, len); + } while (len); +} + +static int ccm_encrypt(struct aead_request *req) +{ + struct crypto_aead *aead = crypto_aead_reqtfm(req); + struct crypto_aes_ctx *ctx = crypto_aead_ctx(aead); + struct blkcipher_desc desc = { .info = req->iv }; + struct blkcipher_walk walk; + u8 __aligned(8) mac[AES_BLOCK_SIZE]; + u8 buf[AES_BLOCK_SIZE]; + u32 len = req->cryptlen; + int err; + + err = ccm_init_mac(req, mac, len); + if (err) + return err; + + kernel_neon_begin_partial(6); + + if (req->assoclen) + ccm_calculate_auth_mac(req, mac); + + /* preserve the original iv for the final round */ + memcpy(buf, req->iv, AES_BLOCK_SIZE); + + blkcipher_walk_init(&walk, req->dst, req->src, len); + err = blkcipher_aead_walk_virt_block(&desc, &walk, aead, + AES_BLOCK_SIZE); + + while (walk.nbytes) { + u32 tail = walk.nbytes % AES_BLOCK_SIZE; + + if (walk.nbytes == len) + tail = 0; + + ce_aes_ccm_encrypt(walk.dst.virt.addr, walk.src.virt.addr, + walk.nbytes - tail, ctx->key_enc, + num_rounds(ctx), mac, walk.iv); + + len -= walk.nbytes - tail; + err = blkcipher_walk_done(&desc, &walk, tail); + } + if (!err) + ce_aes_ccm_final(mac, buf, ctx->key_enc, num_rounds(ctx)); + + kernel_neon_end(); + + if (err) + return err; + + /* copy authtag to end of dst */ + scatterwalk_map_and_copy(mac, req->dst, req->cryptlen, + crypto_aead_authsize(aead), 1); + + return 0; +} + +static int ccm_decrypt(struct aead_request *req) +{ + struct crypto_aead *aead = crypto_aead_reqtfm(req); + struct crypto_aes_ctx *ctx = crypto_aead_ctx(aead); + unsigned int authsize = crypto_aead_authsize(aead); + struct blkcipher_desc desc = { .info = req->iv }; + struct blkcipher_walk walk; + u8 __aligned(8) mac[AES_BLOCK_SIZE]; + u8 buf[AES_BLOCK_SIZE]; + u32 len = req->cryptlen - authsize; + int err; + + err = ccm_init_mac(req, mac, len); + if (err) + return err; + + kernel_neon_begin_partial(6); + + if (req->assoclen) + ccm_calculate_auth_mac(req, mac); + + /* preserve the original iv for the final round */ + memcpy(buf, req->iv, AES_BLOCK_SIZE); + + blkcipher_walk_init(&walk, req->dst, req->src, len); + err = blkcipher_aead_walk_virt_block(&desc, &walk, aead, + AES_BLOCK_SIZE); + + while (walk.nbytes) { + u32 tail = walk.nbytes % AES_BLOCK_SIZE; + + if (walk.nbytes == len) + tail = 0; + + ce_aes_ccm_decrypt(walk.dst.virt.addr, walk.src.virt.addr, + walk.nbytes - tail, ctx->key_enc, + num_rounds(ctx), mac, walk.iv); + + len -= walk.nbytes - tail; + err = blkcipher_walk_done(&desc, &walk, tail); + } + if (!err) + ce_aes_ccm_final(mac, buf, ctx->key_enc, num_rounds(ctx)); + + kernel_neon_end(); + + if (err) + return err; + + /* compare calculated auth tag with the stored one */ + scatterwalk_map_and_copy(buf, req->src, req->cryptlen - authsize, + authsize, 0); + + if (memcmp(mac, buf, authsize)) + return -EBADMSG; + return 0; +} + +static struct crypto_alg ccm_aes_alg = { + .cra_name = "ccm(aes)", + .cra_driver_name = "ccm-aes-ce", + .cra_priority = 300, + .cra_flags = CRYPTO_ALG_TYPE_AEAD, + .cra_blocksize = 1, + .cra_ctxsize = sizeof(struct crypto_aes_ctx), + .cra_alignmask = 7, + .cra_type = &crypto_aead_type, + .cra_module = THIS_MODULE, + .cra_aead = { + .ivsize = AES_BLOCK_SIZE, + .maxauthsize = AES_BLOCK_SIZE, + .setkey = ccm_setkey, + .setauthsize = ccm_setauthsize, + .encrypt = ccm_encrypt, + .decrypt = ccm_decrypt, + } +}; + +static int __init aes_mod_init(void) +{ + if (!(elf_hwcap & HWCAP_AES)) + return -ENODEV; + return crypto_register_alg(&ccm_aes_alg); +} + +static void __exit aes_mod_exit(void) +{ + crypto_unregister_alg(&ccm_aes_alg); +} + +module_init(aes_mod_init); +module_exit(aes_mod_exit); + +MODULE_DESCRIPTION("Synchronous AES in CCM mode using ARMv8 Crypto Extensions"); +MODULE_AUTHOR("Ard Biesheuvel "); +MODULE_LICENSE("GPL v2"); +MODULE_ALIAS("ccm(aes)"); From a3025a1a97c04833415b658283702e37648a7b7e Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 21 Mar 2014 10:19:17 +0100 Subject: [PATCH 57/82] arm64/crypto: AES-ECB/CBC/CTR/XTS using ARMv8 NEON and Crypto Extensions This adds ARMv8 implementations of AES in ECB, CBC, CTR and XTS modes, both for ARMv8 with Crypto Extensions and for plain ARMv8 NEON. The Crypto Extensions version can only run on ARMv8 implementations that have support for these optional extensions. The plain NEON version is a table based yet time invariant implementation. All S-box substitutions are performed in parallel, leveraging the wide range of ARMv8's tbl/tbx instructions, and the huge NEON register file, which can comfortably hold the entire S-box and still have room to spare for doing the actual computations. The key expansion routines were borrowed from aes_generic. Signed-off-by: Ard Biesheuvel Acked-by: Herbert Xu --- arch/arm64/crypto/Kconfig | 14 + arch/arm64/crypto/Makefile | 14 + arch/arm64/crypto/aes-ce.S | 133 +++++++++ arch/arm64/crypto/aes-glue.c | 446 ++++++++++++++++++++++++++++ arch/arm64/crypto/aes-modes.S | 532 ++++++++++++++++++++++++++++++++++ arch/arm64/crypto/aes-neon.S | 382 ++++++++++++++++++++++++ 6 files changed, 1521 insertions(+) create mode 100644 arch/arm64/crypto/aes-ce.S create mode 100644 arch/arm64/crypto/aes-glue.c create mode 100644 arch/arm64/crypto/aes-modes.S create mode 100644 arch/arm64/crypto/aes-neon.S diff --git a/arch/arm64/crypto/Kconfig b/arch/arm64/crypto/Kconfig index 8fffd5af65ef..5562652c5316 100644 --- a/arch/arm64/crypto/Kconfig +++ b/arch/arm64/crypto/Kconfig @@ -36,4 +36,18 @@ config CRYPTO_AES_ARM64_CE_CCM select CRYPTO_AES select CRYPTO_AEAD +config CRYPTO_AES_ARM64_CE_BLK + tristate "AES in ECB/CBC/CTR/XTS modes using ARMv8 Crypto Extensions" + depends on ARM64 && KERNEL_MODE_NEON + select CRYPTO_BLKCIPHER + select CRYPTO_AES + select CRYPTO_ABLK_HELPER + +config CRYPTO_AES_ARM64_NEON_BLK + tristate "AES in ECB/CBC/CTR/XTS modes using NEON instructions" + depends on ARM64 && KERNEL_MODE_NEON + select CRYPTO_BLKCIPHER + select CRYPTO_AES + select CRYPTO_ABLK_HELPER + endif diff --git a/arch/arm64/crypto/Makefile b/arch/arm64/crypto/Makefile index 311287d68078..2070a56ecc46 100644 --- a/arch/arm64/crypto/Makefile +++ b/arch/arm64/crypto/Makefile @@ -22,3 +22,17 @@ CFLAGS_aes-ce-cipher.o += -march=armv8-a+crypto obj-$(CONFIG_CRYPTO_AES_ARM64_CE_CCM) += aes-ce-ccm.o aes-ce-ccm-y := aes-ce-ccm-glue.o aes-ce-ccm-core.o + +obj-$(CONFIG_CRYPTO_AES_ARM64_CE_BLK) += aes-ce-blk.o +aes-ce-blk-y := aes-glue-ce.o aes-ce.o + +obj-$(CONFIG_CRYPTO_AES_ARM64_NEON_BLK) += aes-neon-blk.o +aes-neon-blk-y := aes-glue-neon.o aes-neon.o + +AFLAGS_aes-ce.o := -DINTERLEAVE=2 -DINTERLEAVE_INLINE +AFLAGS_aes-neon.o := -DINTERLEAVE=4 + +CFLAGS_aes-glue-ce.o := -DUSE_V8_CRYPTO_EXTENSIONS + +$(obj)/aes-glue-%.o: $(src)/aes-glue.c FORCE + $(call if_changed_dep,cc_o_c) diff --git a/arch/arm64/crypto/aes-ce.S b/arch/arm64/crypto/aes-ce.S new file mode 100644 index 000000000000..685a18f731eb --- /dev/null +++ b/arch/arm64/crypto/aes-ce.S @@ -0,0 +1,133 @@ +/* + * linux/arch/arm64/crypto/aes-ce.S - AES cipher for ARMv8 with + * Crypto Extensions + * + * Copyright (C) 2013 Linaro Ltd + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include + +#define AES_ENTRY(func) ENTRY(ce_ ## func) +#define AES_ENDPROC(func) ENDPROC(ce_ ## func) + + .arch armv8-a+crypto + + /* preload all round keys */ + .macro load_round_keys, rounds, rk + cmp \rounds, #12 + blo 2222f /* 128 bits */ + beq 1111f /* 192 bits */ + ld1 {v17.16b-v18.16b}, [\rk], #32 +1111: ld1 {v19.16b-v20.16b}, [\rk], #32 +2222: ld1 {v21.16b-v24.16b}, [\rk], #64 + ld1 {v25.16b-v28.16b}, [\rk], #64 + ld1 {v29.16b-v31.16b}, [\rk] + .endm + + /* prepare for encryption with key in rk[] */ + .macro enc_prepare, rounds, rk, ignore + load_round_keys \rounds, \rk + .endm + + /* prepare for encryption (again) but with new key in rk[] */ + .macro enc_switch_key, rounds, rk, ignore + load_round_keys \rounds, \rk + .endm + + /* prepare for decryption with key in rk[] */ + .macro dec_prepare, rounds, rk, ignore + load_round_keys \rounds, \rk + .endm + + .macro do_enc_Nx, de, mc, k, i0, i1, i2, i3 + aes\de \i0\().16b, \k\().16b + .ifnb \i1 + aes\de \i1\().16b, \k\().16b + .ifnb \i3 + aes\de \i2\().16b, \k\().16b + aes\de \i3\().16b, \k\().16b + .endif + .endif + aes\mc \i0\().16b, \i0\().16b + .ifnb \i1 + aes\mc \i1\().16b, \i1\().16b + .ifnb \i3 + aes\mc \i2\().16b, \i2\().16b + aes\mc \i3\().16b, \i3\().16b + .endif + .endif + .endm + + /* up to 4 interleaved encryption rounds with the same round key */ + .macro round_Nx, enc, k, i0, i1, i2, i3 + .ifc \enc, e + do_enc_Nx e, mc, \k, \i0, \i1, \i2, \i3 + .else + do_enc_Nx d, imc, \k, \i0, \i1, \i2, \i3 + .endif + .endm + + /* up to 4 interleaved final rounds */ + .macro fin_round_Nx, de, k, k2, i0, i1, i2, i3 + aes\de \i0\().16b, \k\().16b + .ifnb \i1 + aes\de \i1\().16b, \k\().16b + .ifnb \i3 + aes\de \i2\().16b, \k\().16b + aes\de \i3\().16b, \k\().16b + .endif + .endif + eor \i0\().16b, \i0\().16b, \k2\().16b + .ifnb \i1 + eor \i1\().16b, \i1\().16b, \k2\().16b + .ifnb \i3 + eor \i2\().16b, \i2\().16b, \k2\().16b + eor \i3\().16b, \i3\().16b, \k2\().16b + .endif + .endif + .endm + + /* up to 4 interleaved blocks */ + .macro do_block_Nx, enc, rounds, i0, i1, i2, i3 + cmp \rounds, #12 + blo 2222f /* 128 bits */ + beq 1111f /* 192 bits */ + round_Nx \enc, v17, \i0, \i1, \i2, \i3 + round_Nx \enc, v18, \i0, \i1, \i2, \i3 +1111: round_Nx \enc, v19, \i0, \i1, \i2, \i3 + round_Nx \enc, v20, \i0, \i1, \i2, \i3 +2222: .irp key, v21, v22, v23, v24, v25, v26, v27, v28, v29 + round_Nx \enc, \key, \i0, \i1, \i2, \i3 + .endr + fin_round_Nx \enc, v30, v31, \i0, \i1, \i2, \i3 + .endm + + .macro encrypt_block, in, rounds, t0, t1, t2 + do_block_Nx e, \rounds, \in + .endm + + .macro encrypt_block2x, i0, i1, rounds, t0, t1, t2 + do_block_Nx e, \rounds, \i0, \i1 + .endm + + .macro encrypt_block4x, i0, i1, i2, i3, rounds, t0, t1, t2 + do_block_Nx e, \rounds, \i0, \i1, \i2, \i3 + .endm + + .macro decrypt_block, in, rounds, t0, t1, t2 + do_block_Nx d, \rounds, \in + .endm + + .macro decrypt_block2x, i0, i1, rounds, t0, t1, t2 + do_block_Nx d, \rounds, \i0, \i1 + .endm + + .macro decrypt_block4x, i0, i1, i2, i3, rounds, t0, t1, t2 + do_block_Nx d, \rounds, \i0, \i1, \i2, \i3 + .endm + +#include "aes-modes.S" diff --git a/arch/arm64/crypto/aes-glue.c b/arch/arm64/crypto/aes-glue.c new file mode 100644 index 000000000000..60f2f4c12256 --- /dev/null +++ b/arch/arm64/crypto/aes-glue.c @@ -0,0 +1,446 @@ +/* + * linux/arch/arm64/crypto/aes-glue.c - wrapper code for ARMv8 AES + * + * Copyright (C) 2013 Linaro Ltd + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include + +#ifdef USE_V8_CRYPTO_EXTENSIONS +#define MODE "ce" +#define PRIO 300 +#define aes_ecb_encrypt ce_aes_ecb_encrypt +#define aes_ecb_decrypt ce_aes_ecb_decrypt +#define aes_cbc_encrypt ce_aes_cbc_encrypt +#define aes_cbc_decrypt ce_aes_cbc_decrypt +#define aes_ctr_encrypt ce_aes_ctr_encrypt +#define aes_xts_encrypt ce_aes_xts_encrypt +#define aes_xts_decrypt ce_aes_xts_decrypt +MODULE_DESCRIPTION("AES-ECB/CBC/CTR/XTS using ARMv8 Crypto Extensions"); +#else +#define MODE "neon" +#define PRIO 200 +#define aes_ecb_encrypt neon_aes_ecb_encrypt +#define aes_ecb_decrypt neon_aes_ecb_decrypt +#define aes_cbc_encrypt neon_aes_cbc_encrypt +#define aes_cbc_decrypt neon_aes_cbc_decrypt +#define aes_ctr_encrypt neon_aes_ctr_encrypt +#define aes_xts_encrypt neon_aes_xts_encrypt +#define aes_xts_decrypt neon_aes_xts_decrypt +MODULE_DESCRIPTION("AES-ECB/CBC/CTR/XTS using ARMv8 NEON"); +MODULE_ALIAS("ecb(aes)"); +MODULE_ALIAS("cbc(aes)"); +MODULE_ALIAS("ctr(aes)"); +MODULE_ALIAS("xts(aes)"); +#endif + +MODULE_AUTHOR("Ard Biesheuvel "); +MODULE_LICENSE("GPL v2"); + +/* defined in aes-modes.S */ +asmlinkage void aes_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[], + int rounds, int blocks, int first); +asmlinkage void aes_ecb_decrypt(u8 out[], u8 const in[], u8 const rk[], + int rounds, int blocks, int first); + +asmlinkage void aes_cbc_encrypt(u8 out[], u8 const in[], u8 const rk[], + int rounds, int blocks, u8 iv[], int first); +asmlinkage void aes_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[], + int rounds, int blocks, u8 iv[], int first); + +asmlinkage void aes_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[], + int rounds, int blocks, u8 ctr[], int first); + +asmlinkage void aes_xts_encrypt(u8 out[], u8 const in[], u8 const rk1[], + int rounds, int blocks, u8 const rk2[], u8 iv[], + int first); +asmlinkage void aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[], + int rounds, int blocks, u8 const rk2[], u8 iv[], + int first); + +struct crypto_aes_xts_ctx { + struct crypto_aes_ctx key1; + struct crypto_aes_ctx __aligned(8) key2; +}; + +static int xts_set_key(struct crypto_tfm *tfm, const u8 *in_key, + unsigned int key_len) +{ + struct crypto_aes_xts_ctx *ctx = crypto_tfm_ctx(tfm); + int ret; + + ret = crypto_aes_expand_key(&ctx->key1, in_key, key_len / 2); + if (!ret) + ret = crypto_aes_expand_key(&ctx->key2, &in_key[key_len / 2], + key_len / 2); + if (!ret) + return 0; + + tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; + return -EINVAL; +} + +static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + struct crypto_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + int err, first, rounds = 6 + ctx->key_length / 4; + struct blkcipher_walk walk; + unsigned int blocks; + + desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; + blkcipher_walk_init(&walk, dst, src, nbytes); + err = blkcipher_walk_virt(desc, &walk); + + kernel_neon_begin(); + for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) { + aes_ecb_encrypt(walk.dst.virt.addr, walk.src.virt.addr, + (u8 *)ctx->key_enc, rounds, blocks, first); + err = blkcipher_walk_done(desc, &walk, 0); + } + kernel_neon_end(); + return err; +} + +static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + struct crypto_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + int err, first, rounds = 6 + ctx->key_length / 4; + struct blkcipher_walk walk; + unsigned int blocks; + + desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; + blkcipher_walk_init(&walk, dst, src, nbytes); + err = blkcipher_walk_virt(desc, &walk); + + kernel_neon_begin(); + for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) { + aes_ecb_decrypt(walk.dst.virt.addr, walk.src.virt.addr, + (u8 *)ctx->key_dec, rounds, blocks, first); + err = blkcipher_walk_done(desc, &walk, 0); + } + kernel_neon_end(); + return err; +} + +static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + struct crypto_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + int err, first, rounds = 6 + ctx->key_length / 4; + struct blkcipher_walk walk; + unsigned int blocks; + + desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; + blkcipher_walk_init(&walk, dst, src, nbytes); + err = blkcipher_walk_virt(desc, &walk); + + kernel_neon_begin(); + for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) { + aes_cbc_encrypt(walk.dst.virt.addr, walk.src.virt.addr, + (u8 *)ctx->key_enc, rounds, blocks, walk.iv, + first); + err = blkcipher_walk_done(desc, &walk, 0); + } + kernel_neon_end(); + return err; +} + +static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + struct crypto_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + int err, first, rounds = 6 + ctx->key_length / 4; + struct blkcipher_walk walk; + unsigned int blocks; + + desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; + blkcipher_walk_init(&walk, dst, src, nbytes); + err = blkcipher_walk_virt(desc, &walk); + + kernel_neon_begin(); + for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) { + aes_cbc_decrypt(walk.dst.virt.addr, walk.src.virt.addr, + (u8 *)ctx->key_dec, rounds, blocks, walk.iv, + first); + err = blkcipher_walk_done(desc, &walk, 0); + } + kernel_neon_end(); + return err; +} + +static int ctr_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + struct crypto_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + int err, first, rounds = 6 + ctx->key_length / 4; + struct blkcipher_walk walk; + int blocks; + + desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; + blkcipher_walk_init(&walk, dst, src, nbytes); + err = blkcipher_walk_virt_block(desc, &walk, AES_BLOCK_SIZE); + + first = 1; + kernel_neon_begin(); + while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) { + aes_ctr_encrypt(walk.dst.virt.addr, walk.src.virt.addr, + (u8 *)ctx->key_enc, rounds, blocks, walk.iv, + first); + first = 0; + nbytes -= blocks * AES_BLOCK_SIZE; + if (nbytes && nbytes == walk.nbytes % AES_BLOCK_SIZE) + break; + err = blkcipher_walk_done(desc, &walk, + walk.nbytes % AES_BLOCK_SIZE); + } + if (nbytes) { + u8 *tdst = walk.dst.virt.addr + blocks * AES_BLOCK_SIZE; + u8 *tsrc = walk.src.virt.addr + blocks * AES_BLOCK_SIZE; + u8 __aligned(8) tail[AES_BLOCK_SIZE]; + + /* + * Minimum alignment is 8 bytes, so if nbytes is <= 8, we need + * to tell aes_ctr_encrypt() to only read half a block. + */ + blocks = (nbytes <= 8) ? -1 : 1; + + aes_ctr_encrypt(tail, tsrc, (u8 *)ctx->key_enc, rounds, + blocks, walk.iv, first); + memcpy(tdst, tail, nbytes); + err = blkcipher_walk_done(desc, &walk, 0); + } + kernel_neon_end(); + + return err; +} + +static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + struct crypto_aes_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + int err, first, rounds = 6 + ctx->key1.key_length / 4; + struct blkcipher_walk walk; + unsigned int blocks; + + desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; + blkcipher_walk_init(&walk, dst, src, nbytes); + err = blkcipher_walk_virt(desc, &walk); + + kernel_neon_begin(); + for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) { + aes_xts_encrypt(walk.dst.virt.addr, walk.src.virt.addr, + (u8 *)ctx->key1.key_enc, rounds, blocks, + (u8 *)ctx->key2.key_enc, walk.iv, first); + err = blkcipher_walk_done(desc, &walk, 0); + } + kernel_neon_end(); + + return err; +} + +static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + struct crypto_aes_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + int err, first, rounds = 6 + ctx->key1.key_length / 4; + struct blkcipher_walk walk; + unsigned int blocks; + + desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; + blkcipher_walk_init(&walk, dst, src, nbytes); + err = blkcipher_walk_virt(desc, &walk); + + kernel_neon_begin(); + for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) { + aes_xts_decrypt(walk.dst.virt.addr, walk.src.virt.addr, + (u8 *)ctx->key1.key_dec, rounds, blocks, + (u8 *)ctx->key2.key_enc, walk.iv, first); + err = blkcipher_walk_done(desc, &walk, 0); + } + kernel_neon_end(); + + return err; +} + +static struct crypto_alg aes_algs[] = { { + .cra_name = "__ecb-aes-" MODE, + .cra_driver_name = "__driver-ecb-aes-" MODE, + .cra_priority = 0, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct crypto_aes_ctx), + .cra_alignmask = 7, + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_blkcipher = { + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = crypto_aes_set_key, + .encrypt = ecb_encrypt, + .decrypt = ecb_decrypt, + }, +}, { + .cra_name = "__cbc-aes-" MODE, + .cra_driver_name = "__driver-cbc-aes-" MODE, + .cra_priority = 0, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct crypto_aes_ctx), + .cra_alignmask = 7, + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_blkcipher = { + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = crypto_aes_set_key, + .encrypt = cbc_encrypt, + .decrypt = cbc_decrypt, + }, +}, { + .cra_name = "__ctr-aes-" MODE, + .cra_driver_name = "__driver-ctr-aes-" MODE, + .cra_priority = 0, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = 1, + .cra_ctxsize = sizeof(struct crypto_aes_ctx), + .cra_alignmask = 7, + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_blkcipher = { + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = crypto_aes_set_key, + .encrypt = ctr_encrypt, + .decrypt = ctr_encrypt, + }, +}, { + .cra_name = "__xts-aes-" MODE, + .cra_driver_name = "__driver-xts-aes-" MODE, + .cra_priority = 0, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct crypto_aes_xts_ctx), + .cra_alignmask = 7, + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_blkcipher = { + .min_keysize = 2 * AES_MIN_KEY_SIZE, + .max_keysize = 2 * AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = xts_set_key, + .encrypt = xts_encrypt, + .decrypt = xts_decrypt, + }, +}, { + .cra_name = "ecb(aes)", + .cra_driver_name = "ecb-aes-" MODE, + .cra_priority = PRIO, + .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct async_helper_ctx), + .cra_alignmask = 7, + .cra_type = &crypto_ablkcipher_type, + .cra_module = THIS_MODULE, + .cra_init = ablk_init, + .cra_exit = ablk_exit, + .cra_ablkcipher = { + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = ablk_set_key, + .encrypt = ablk_encrypt, + .decrypt = ablk_decrypt, + } +}, { + .cra_name = "cbc(aes)", + .cra_driver_name = "cbc-aes-" MODE, + .cra_priority = PRIO, + .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct async_helper_ctx), + .cra_alignmask = 7, + .cra_type = &crypto_ablkcipher_type, + .cra_module = THIS_MODULE, + .cra_init = ablk_init, + .cra_exit = ablk_exit, + .cra_ablkcipher = { + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = ablk_set_key, + .encrypt = ablk_encrypt, + .decrypt = ablk_decrypt, + } +}, { + .cra_name = "ctr(aes)", + .cra_driver_name = "ctr-aes-" MODE, + .cra_priority = PRIO, + .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC, + .cra_blocksize = 1, + .cra_ctxsize = sizeof(struct async_helper_ctx), + .cra_alignmask = 7, + .cra_type = &crypto_ablkcipher_type, + .cra_module = THIS_MODULE, + .cra_init = ablk_init, + .cra_exit = ablk_exit, + .cra_ablkcipher = { + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = ablk_set_key, + .encrypt = ablk_encrypt, + .decrypt = ablk_decrypt, + } +}, { + .cra_name = "xts(aes)", + .cra_driver_name = "xts-aes-" MODE, + .cra_priority = PRIO, + .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct async_helper_ctx), + .cra_alignmask = 7, + .cra_type = &crypto_ablkcipher_type, + .cra_module = THIS_MODULE, + .cra_init = ablk_init, + .cra_exit = ablk_exit, + .cra_ablkcipher = { + .min_keysize = 2 * AES_MIN_KEY_SIZE, + .max_keysize = 2 * AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = ablk_set_key, + .encrypt = ablk_encrypt, + .decrypt = ablk_decrypt, + } +} }; + +static int __init aes_init(void) +{ + return crypto_register_algs(aes_algs, ARRAY_SIZE(aes_algs)); +} + +static void __exit aes_exit(void) +{ + crypto_unregister_algs(aes_algs, ARRAY_SIZE(aes_algs)); +} + +#ifdef USE_V8_CRYPTO_EXTENSIONS +module_cpu_feature_match(AES, aes_init); +#else +module_init(aes_init); +#endif +module_exit(aes_exit); diff --git a/arch/arm64/crypto/aes-modes.S b/arch/arm64/crypto/aes-modes.S new file mode 100644 index 000000000000..f6e372c528eb --- /dev/null +++ b/arch/arm64/crypto/aes-modes.S @@ -0,0 +1,532 @@ +/* + * linux/arch/arm64/crypto/aes-modes.S - chaining mode wrappers for AES + * + * Copyright (C) 2013 Linaro Ltd + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +/* included by aes-ce.S and aes-neon.S */ + + .text + .align 4 + +/* + * There are several ways to instantiate this code: + * - no interleave, all inline + * - 2-way interleave, 2x calls out of line (-DINTERLEAVE=2) + * - 2-way interleave, all inline (-DINTERLEAVE=2 -DINTERLEAVE_INLINE) + * - 4-way interleave, 4x calls out of line (-DINTERLEAVE=4) + * - 4-way interleave, all inline (-DINTERLEAVE=4 -DINTERLEAVE_INLINE) + * + * Macros imported by this code: + * - enc_prepare - setup NEON registers for encryption + * - dec_prepare - setup NEON registers for decryption + * - enc_switch_key - change to new key after having prepared for encryption + * - encrypt_block - encrypt a single block + * - decrypt block - decrypt a single block + * - encrypt_block2x - encrypt 2 blocks in parallel (if INTERLEAVE == 2) + * - decrypt_block2x - decrypt 2 blocks in parallel (if INTERLEAVE == 2) + * - encrypt_block4x - encrypt 4 blocks in parallel (if INTERLEAVE == 4) + * - decrypt_block4x - decrypt 4 blocks in parallel (if INTERLEAVE == 4) + */ + +#if defined(INTERLEAVE) && !defined(INTERLEAVE_INLINE) +#define FRAME_PUSH stp x29, x30, [sp,#-16]! ; mov x29, sp +#define FRAME_POP ldp x29, x30, [sp],#16 + +#if INTERLEAVE == 2 + +aes_encrypt_block2x: + encrypt_block2x v0, v1, w3, x2, x6, w7 + ret +ENDPROC(aes_encrypt_block2x) + +aes_decrypt_block2x: + decrypt_block2x v0, v1, w3, x2, x6, w7 + ret +ENDPROC(aes_decrypt_block2x) + +#elif INTERLEAVE == 4 + +aes_encrypt_block4x: + encrypt_block4x v0, v1, v2, v3, w3, x2, x6, w7 + ret +ENDPROC(aes_encrypt_block4x) + +aes_decrypt_block4x: + decrypt_block4x v0, v1, v2, v3, w3, x2, x6, w7 + ret +ENDPROC(aes_decrypt_block4x) + +#else +#error INTERLEAVE should equal 2 or 4 +#endif + + .macro do_encrypt_block2x + bl aes_encrypt_block2x + .endm + + .macro do_decrypt_block2x + bl aes_decrypt_block2x + .endm + + .macro do_encrypt_block4x + bl aes_encrypt_block4x + .endm + + .macro do_decrypt_block4x + bl aes_decrypt_block4x + .endm + +#else +#define FRAME_PUSH +#define FRAME_POP + + .macro do_encrypt_block2x + encrypt_block2x v0, v1, w3, x2, x6, w7 + .endm + + .macro do_decrypt_block2x + decrypt_block2x v0, v1, w3, x2, x6, w7 + .endm + + .macro do_encrypt_block4x + encrypt_block4x v0, v1, v2, v3, w3, x2, x6, w7 + .endm + + .macro do_decrypt_block4x + decrypt_block4x v0, v1, v2, v3, w3, x2, x6, w7 + .endm + +#endif + + /* + * aes_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, + * int blocks, int first) + * aes_ecb_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, + * int blocks, int first) + */ + +AES_ENTRY(aes_ecb_encrypt) + FRAME_PUSH + cbz w5, .LecbencloopNx + + enc_prepare w3, x2, x5 + +.LecbencloopNx: +#if INTERLEAVE >= 2 + subs w4, w4, #INTERLEAVE + bmi .Lecbenc1x +#if INTERLEAVE == 2 + ld1 {v0.16b-v1.16b}, [x1], #32 /* get 2 pt blocks */ + do_encrypt_block2x + st1 {v0.16b-v1.16b}, [x0], #32 +#else + ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 pt blocks */ + do_encrypt_block4x + st1 {v0.16b-v3.16b}, [x0], #64 +#endif + b .LecbencloopNx +.Lecbenc1x: + adds w4, w4, #INTERLEAVE + beq .Lecbencout +#endif +.Lecbencloop: + ld1 {v0.16b}, [x1], #16 /* get next pt block */ + encrypt_block v0, w3, x2, x5, w6 + st1 {v0.16b}, [x0], #16 + subs w4, w4, #1 + bne .Lecbencloop +.Lecbencout: + FRAME_POP + ret +AES_ENDPROC(aes_ecb_encrypt) + + +AES_ENTRY(aes_ecb_decrypt) + FRAME_PUSH + cbz w5, .LecbdecloopNx + + dec_prepare w3, x2, x5 + +.LecbdecloopNx: +#if INTERLEAVE >= 2 + subs w4, w4, #INTERLEAVE + bmi .Lecbdec1x +#if INTERLEAVE == 2 + ld1 {v0.16b-v1.16b}, [x1], #32 /* get 2 ct blocks */ + do_decrypt_block2x + st1 {v0.16b-v1.16b}, [x0], #32 +#else + ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 ct blocks */ + do_decrypt_block4x + st1 {v0.16b-v3.16b}, [x0], #64 +#endif + b .LecbdecloopNx +.Lecbdec1x: + adds w4, w4, #INTERLEAVE + beq .Lecbdecout +#endif +.Lecbdecloop: + ld1 {v0.16b}, [x1], #16 /* get next ct block */ + decrypt_block v0, w3, x2, x5, w6 + st1 {v0.16b}, [x0], #16 + subs w4, w4, #1 + bne .Lecbdecloop +.Lecbdecout: + FRAME_POP + ret +AES_ENDPROC(aes_ecb_decrypt) + + + /* + * aes_cbc_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, + * int blocks, u8 iv[], int first) + * aes_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, + * int blocks, u8 iv[], int first) + */ + +AES_ENTRY(aes_cbc_encrypt) + cbz w6, .Lcbcencloop + + ld1 {v0.16b}, [x5] /* get iv */ + enc_prepare w3, x2, x5 + +.Lcbcencloop: + ld1 {v1.16b}, [x1], #16 /* get next pt block */ + eor v0.16b, v0.16b, v1.16b /* ..and xor with iv */ + encrypt_block v0, w3, x2, x5, w6 + st1 {v0.16b}, [x0], #16 + subs w4, w4, #1 + bne .Lcbcencloop + ret +AES_ENDPROC(aes_cbc_encrypt) + + +AES_ENTRY(aes_cbc_decrypt) + FRAME_PUSH + cbz w6, .LcbcdecloopNx + + ld1 {v7.16b}, [x5] /* get iv */ + dec_prepare w3, x2, x5 + +.LcbcdecloopNx: +#if INTERLEAVE >= 2 + subs w4, w4, #INTERLEAVE + bmi .Lcbcdec1x +#if INTERLEAVE == 2 + ld1 {v0.16b-v1.16b}, [x1], #32 /* get 2 ct blocks */ + mov v2.16b, v0.16b + mov v3.16b, v1.16b + do_decrypt_block2x + eor v0.16b, v0.16b, v7.16b + eor v1.16b, v1.16b, v2.16b + mov v7.16b, v3.16b + st1 {v0.16b-v1.16b}, [x0], #32 +#else + ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 ct blocks */ + mov v4.16b, v0.16b + mov v5.16b, v1.16b + mov v6.16b, v2.16b + do_decrypt_block4x + sub x1, x1, #16 + eor v0.16b, v0.16b, v7.16b + eor v1.16b, v1.16b, v4.16b + ld1 {v7.16b}, [x1], #16 /* reload 1 ct block */ + eor v2.16b, v2.16b, v5.16b + eor v3.16b, v3.16b, v6.16b + st1 {v0.16b-v3.16b}, [x0], #64 +#endif + b .LcbcdecloopNx +.Lcbcdec1x: + adds w4, w4, #INTERLEAVE + beq .Lcbcdecout +#endif +.Lcbcdecloop: + ld1 {v1.16b}, [x1], #16 /* get next ct block */ + mov v0.16b, v1.16b /* ...and copy to v0 */ + decrypt_block v0, w3, x2, x5, w6 + eor v0.16b, v0.16b, v7.16b /* xor with iv => pt */ + mov v7.16b, v1.16b /* ct is next iv */ + st1 {v0.16b}, [x0], #16 + subs w4, w4, #1 + bne .Lcbcdecloop +.Lcbcdecout: + FRAME_POP + ret +AES_ENDPROC(aes_cbc_decrypt) + + + /* + * aes_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, + * int blocks, u8 ctr[], int first) + */ + +AES_ENTRY(aes_ctr_encrypt) + FRAME_PUSH + cbnz w6, .Lctrfirst /* 1st time around? */ + umov x5, v4.d[1] /* keep swabbed ctr in reg */ + rev x5, x5 +#if INTERLEAVE >= 2 + cmn w5, w4 /* 32 bit overflow? */ + bcs .Lctrinc + add x5, x5, #1 /* increment BE ctr */ + b .LctrincNx +#else + b .Lctrinc +#endif +.Lctrfirst: + enc_prepare w3, x2, x6 + ld1 {v4.16b}, [x5] + umov x5, v4.d[1] /* keep swabbed ctr in reg */ + rev x5, x5 +#if INTERLEAVE >= 2 + cmn w5, w4 /* 32 bit overflow? */ + bcs .Lctrloop +.LctrloopNx: + subs w4, w4, #INTERLEAVE + bmi .Lctr1x +#if INTERLEAVE == 2 + mov v0.8b, v4.8b + mov v1.8b, v4.8b + rev x7, x5 + add x5, x5, #1 + ins v0.d[1], x7 + rev x7, x5 + add x5, x5, #1 + ins v1.d[1], x7 + ld1 {v2.16b-v3.16b}, [x1], #32 /* get 2 input blocks */ + do_encrypt_block2x + eor v0.16b, v0.16b, v2.16b + eor v1.16b, v1.16b, v3.16b + st1 {v0.16b-v1.16b}, [x0], #32 +#else + ldr q8, =0x30000000200000001 /* addends 1,2,3[,0] */ + dup v7.4s, w5 + mov v0.16b, v4.16b + add v7.4s, v7.4s, v8.4s + mov v1.16b, v4.16b + rev32 v8.16b, v7.16b + mov v2.16b, v4.16b + mov v3.16b, v4.16b + mov v1.s[3], v8.s[0] + mov v2.s[3], v8.s[1] + mov v3.s[3], v8.s[2] + ld1 {v5.16b-v7.16b}, [x1], #48 /* get 3 input blocks */ + do_encrypt_block4x + eor v0.16b, v5.16b, v0.16b + ld1 {v5.16b}, [x1], #16 /* get 1 input block */ + eor v1.16b, v6.16b, v1.16b + eor v2.16b, v7.16b, v2.16b + eor v3.16b, v5.16b, v3.16b + st1 {v0.16b-v3.16b}, [x0], #64 + add x5, x5, #INTERLEAVE +#endif + cbz w4, .LctroutNx +.LctrincNx: + rev x7, x5 + ins v4.d[1], x7 + b .LctrloopNx +.LctroutNx: + sub x5, x5, #1 + rev x7, x5 + ins v4.d[1], x7 + b .Lctrout +.Lctr1x: + adds w4, w4, #INTERLEAVE + beq .Lctrout +#endif +.Lctrloop: + mov v0.16b, v4.16b + encrypt_block v0, w3, x2, x6, w7 + subs w4, w4, #1 + bmi .Lctrhalfblock /* blocks < 0 means 1/2 block */ + ld1 {v3.16b}, [x1], #16 + eor v3.16b, v0.16b, v3.16b + st1 {v3.16b}, [x0], #16 + beq .Lctrout +.Lctrinc: + adds x5, x5, #1 /* increment BE ctr */ + rev x7, x5 + ins v4.d[1], x7 + bcc .Lctrloop /* no overflow? */ + umov x7, v4.d[0] /* load upper word of ctr */ + rev x7, x7 /* ... to handle the carry */ + add x7, x7, #1 + rev x7, x7 + ins v4.d[0], x7 + b .Lctrloop +.Lctrhalfblock: + ld1 {v3.8b}, [x1] + eor v3.8b, v0.8b, v3.8b + st1 {v3.8b}, [x0] +.Lctrout: + FRAME_POP + ret +AES_ENDPROC(aes_ctr_encrypt) + .ltorg + + + /* + * aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds, + * int blocks, u8 const rk2[], u8 iv[], int first) + * aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds, + * int blocks, u8 const rk2[], u8 iv[], int first) + */ + + .macro next_tweak, out, in, const, tmp + sshr \tmp\().2d, \in\().2d, #63 + and \tmp\().16b, \tmp\().16b, \const\().16b + add \out\().2d, \in\().2d, \in\().2d + ext \tmp\().16b, \tmp\().16b, \tmp\().16b, #8 + eor \out\().16b, \out\().16b, \tmp\().16b + .endm + +.Lxts_mul_x: + .word 1, 0, 0x87, 0 + +AES_ENTRY(aes_xts_encrypt) + FRAME_PUSH + cbz w7, .LxtsencloopNx + + ld1 {v4.16b}, [x6] + enc_prepare w3, x5, x6 + encrypt_block v4, w3, x5, x6, w7 /* first tweak */ + enc_switch_key w3, x2, x6 + ldr q7, .Lxts_mul_x + b .LxtsencNx + +.LxtsencloopNx: + ldr q7, .Lxts_mul_x + next_tweak v4, v4, v7, v8 +.LxtsencNx: +#if INTERLEAVE >= 2 + subs w4, w4, #INTERLEAVE + bmi .Lxtsenc1x +#if INTERLEAVE == 2 + ld1 {v0.16b-v1.16b}, [x1], #32 /* get 2 pt blocks */ + next_tweak v5, v4, v7, v8 + eor v0.16b, v0.16b, v4.16b + eor v1.16b, v1.16b, v5.16b + do_encrypt_block2x + eor v0.16b, v0.16b, v4.16b + eor v1.16b, v1.16b, v5.16b + st1 {v0.16b-v1.16b}, [x0], #32 + cbz w4, .LxtsencoutNx + next_tweak v4, v5, v7, v8 + b .LxtsencNx +.LxtsencoutNx: + mov v4.16b, v5.16b + b .Lxtsencout +#else + ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 pt blocks */ + next_tweak v5, v4, v7, v8 + eor v0.16b, v0.16b, v4.16b + next_tweak v6, v5, v7, v8 + eor v1.16b, v1.16b, v5.16b + eor v2.16b, v2.16b, v6.16b + next_tweak v7, v6, v7, v8 + eor v3.16b, v3.16b, v7.16b + do_encrypt_block4x + eor v3.16b, v3.16b, v7.16b + eor v0.16b, v0.16b, v4.16b + eor v1.16b, v1.16b, v5.16b + eor v2.16b, v2.16b, v6.16b + st1 {v0.16b-v3.16b}, [x0], #64 + mov v4.16b, v7.16b + cbz w4, .Lxtsencout + b .LxtsencloopNx +#endif +.Lxtsenc1x: + adds w4, w4, #INTERLEAVE + beq .Lxtsencout +#endif +.Lxtsencloop: + ld1 {v1.16b}, [x1], #16 + eor v0.16b, v1.16b, v4.16b + encrypt_block v0, w3, x2, x6, w7 + eor v0.16b, v0.16b, v4.16b + st1 {v0.16b}, [x0], #16 + subs w4, w4, #1 + beq .Lxtsencout + next_tweak v4, v4, v7, v8 + b .Lxtsencloop +.Lxtsencout: + FRAME_POP + ret +AES_ENDPROC(aes_xts_encrypt) + + +AES_ENTRY(aes_xts_decrypt) + FRAME_PUSH + cbz w7, .LxtsdecloopNx + + ld1 {v4.16b}, [x6] + enc_prepare w3, x5, x6 + encrypt_block v4, w3, x5, x6, w7 /* first tweak */ + dec_prepare w3, x2, x6 + ldr q7, .Lxts_mul_x + b .LxtsdecNx + +.LxtsdecloopNx: + ldr q7, .Lxts_mul_x + next_tweak v4, v4, v7, v8 +.LxtsdecNx: +#if INTERLEAVE >= 2 + subs w4, w4, #INTERLEAVE + bmi .Lxtsdec1x +#if INTERLEAVE == 2 + ld1 {v0.16b-v1.16b}, [x1], #32 /* get 2 ct blocks */ + next_tweak v5, v4, v7, v8 + eor v0.16b, v0.16b, v4.16b + eor v1.16b, v1.16b, v5.16b + do_decrypt_block2x + eor v0.16b, v0.16b, v4.16b + eor v1.16b, v1.16b, v5.16b + st1 {v0.16b-v1.16b}, [x0], #32 + cbz w4, .LxtsdecoutNx + next_tweak v4, v5, v7, v8 + b .LxtsdecNx +.LxtsdecoutNx: + mov v4.16b, v5.16b + b .Lxtsdecout +#else + ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 ct blocks */ + next_tweak v5, v4, v7, v8 + eor v0.16b, v0.16b, v4.16b + next_tweak v6, v5, v7, v8 + eor v1.16b, v1.16b, v5.16b + eor v2.16b, v2.16b, v6.16b + next_tweak v7, v6, v7, v8 + eor v3.16b, v3.16b, v7.16b + do_decrypt_block4x + eor v3.16b, v3.16b, v7.16b + eor v0.16b, v0.16b, v4.16b + eor v1.16b, v1.16b, v5.16b + eor v2.16b, v2.16b, v6.16b + st1 {v0.16b-v3.16b}, [x0], #64 + mov v4.16b, v7.16b + cbz w4, .Lxtsdecout + b .LxtsdecloopNx +#endif +.Lxtsdec1x: + adds w4, w4, #INTERLEAVE + beq .Lxtsdecout +#endif +.Lxtsdecloop: + ld1 {v1.16b}, [x1], #16 + eor v0.16b, v1.16b, v4.16b + decrypt_block v0, w3, x2, x6, w7 + eor v0.16b, v0.16b, v4.16b + st1 {v0.16b}, [x0], #16 + subs w4, w4, #1 + beq .Lxtsdecout + next_tweak v4, v4, v7, v8 + b .Lxtsdecloop +.Lxtsdecout: + FRAME_POP + ret +AES_ENDPROC(aes_xts_decrypt) diff --git a/arch/arm64/crypto/aes-neon.S b/arch/arm64/crypto/aes-neon.S new file mode 100644 index 000000000000..b93170e1cc93 --- /dev/null +++ b/arch/arm64/crypto/aes-neon.S @@ -0,0 +1,382 @@ +/* + * linux/arch/arm64/crypto/aes-neon.S - AES cipher for ARMv8 NEON + * + * Copyright (C) 2013 Linaro Ltd + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include + +#define AES_ENTRY(func) ENTRY(neon_ ## func) +#define AES_ENDPROC(func) ENDPROC(neon_ ## func) + + /* multiply by polynomial 'x' in GF(2^8) */ + .macro mul_by_x, out, in, temp, const + sshr \temp, \in, #7 + add \out, \in, \in + and \temp, \temp, \const + eor \out, \out, \temp + .endm + + /* preload the entire Sbox */ + .macro prepare, sbox, shiftrows, temp + adr \temp, \sbox + movi v12.16b, #0x40 + ldr q13, \shiftrows + movi v14.16b, #0x1b + ld1 {v16.16b-v19.16b}, [\temp], #64 + ld1 {v20.16b-v23.16b}, [\temp], #64 + ld1 {v24.16b-v27.16b}, [\temp], #64 + ld1 {v28.16b-v31.16b}, [\temp] + .endm + + /* do preload for encryption */ + .macro enc_prepare, ignore0, ignore1, temp + prepare .LForward_Sbox, .LForward_ShiftRows, \temp + .endm + + .macro enc_switch_key, ignore0, ignore1, temp + /* do nothing */ + .endm + + /* do preload for decryption */ + .macro dec_prepare, ignore0, ignore1, temp + prepare .LReverse_Sbox, .LReverse_ShiftRows, \temp + .endm + + /* apply SubBytes transformation using the the preloaded Sbox */ + .macro sub_bytes, in + sub v9.16b, \in\().16b, v12.16b + tbl \in\().16b, {v16.16b-v19.16b}, \in\().16b + sub v10.16b, v9.16b, v12.16b + tbx \in\().16b, {v20.16b-v23.16b}, v9.16b + sub v11.16b, v10.16b, v12.16b + tbx \in\().16b, {v24.16b-v27.16b}, v10.16b + tbx \in\().16b, {v28.16b-v31.16b}, v11.16b + .endm + + /* apply MixColumns transformation */ + .macro mix_columns, in + mul_by_x v10.16b, \in\().16b, v9.16b, v14.16b + rev32 v8.8h, \in\().8h + eor \in\().16b, v10.16b, \in\().16b + shl v9.4s, v8.4s, #24 + shl v11.4s, \in\().4s, #24 + sri v9.4s, v8.4s, #8 + sri v11.4s, \in\().4s, #8 + eor v9.16b, v9.16b, v8.16b + eor v10.16b, v10.16b, v9.16b + eor \in\().16b, v10.16b, v11.16b + .endm + + /* Inverse MixColumns: pre-multiply by { 5, 0, 4, 0 } */ + .macro inv_mix_columns, in + mul_by_x v11.16b, \in\().16b, v10.16b, v14.16b + mul_by_x v11.16b, v11.16b, v10.16b, v14.16b + eor \in\().16b, \in\().16b, v11.16b + rev32 v11.8h, v11.8h + eor \in\().16b, \in\().16b, v11.16b + mix_columns \in + .endm + + .macro do_block, enc, in, rounds, rk, rkp, i + ld1 {v15.16b}, [\rk] + add \rkp, \rk, #16 + mov \i, \rounds +1111: eor \in\().16b, \in\().16b, v15.16b /* ^round key */ + tbl \in\().16b, {\in\().16b}, v13.16b /* ShiftRows */ + sub_bytes \in + ld1 {v15.16b}, [\rkp], #16 + subs \i, \i, #1 + beq 2222f + .if \enc == 1 + mix_columns \in + .else + inv_mix_columns \in + .endif + b 1111b +2222: eor \in\().16b, \in\().16b, v15.16b /* ^round key */ + .endm + + .macro encrypt_block, in, rounds, rk, rkp, i + do_block 1, \in, \rounds, \rk, \rkp, \i + .endm + + .macro decrypt_block, in, rounds, rk, rkp, i + do_block 0, \in, \rounds, \rk, \rkp, \i + .endm + + /* + * Interleaved versions: functionally equivalent to the + * ones above, but applied to 2 or 4 AES states in parallel. + */ + + .macro sub_bytes_2x, in0, in1 + sub v8.16b, \in0\().16b, v12.16b + sub v9.16b, \in1\().16b, v12.16b + tbl \in0\().16b, {v16.16b-v19.16b}, \in0\().16b + tbl \in1\().16b, {v16.16b-v19.16b}, \in1\().16b + sub v10.16b, v8.16b, v12.16b + sub v11.16b, v9.16b, v12.16b + tbx \in0\().16b, {v20.16b-v23.16b}, v8.16b + tbx \in1\().16b, {v20.16b-v23.16b}, v9.16b + sub v8.16b, v10.16b, v12.16b + sub v9.16b, v11.16b, v12.16b + tbx \in0\().16b, {v24.16b-v27.16b}, v10.16b + tbx \in1\().16b, {v24.16b-v27.16b}, v11.16b + tbx \in0\().16b, {v28.16b-v31.16b}, v8.16b + tbx \in1\().16b, {v28.16b-v31.16b}, v9.16b + .endm + + .macro sub_bytes_4x, in0, in1, in2, in3 + sub v8.16b, \in0\().16b, v12.16b + tbl \in0\().16b, {v16.16b-v19.16b}, \in0\().16b + sub v9.16b, \in1\().16b, v12.16b + tbl \in1\().16b, {v16.16b-v19.16b}, \in1\().16b + sub v10.16b, \in2\().16b, v12.16b + tbl \in2\().16b, {v16.16b-v19.16b}, \in2\().16b + sub v11.16b, \in3\().16b, v12.16b + tbl \in3\().16b, {v16.16b-v19.16b}, \in3\().16b + tbx \in0\().16b, {v20.16b-v23.16b}, v8.16b + tbx \in1\().16b, {v20.16b-v23.16b}, v9.16b + sub v8.16b, v8.16b, v12.16b + tbx \in2\().16b, {v20.16b-v23.16b}, v10.16b + sub v9.16b, v9.16b, v12.16b + tbx \in3\().16b, {v20.16b-v23.16b}, v11.16b + sub v10.16b, v10.16b, v12.16b + tbx \in0\().16b, {v24.16b-v27.16b}, v8.16b + sub v11.16b, v11.16b, v12.16b + tbx \in1\().16b, {v24.16b-v27.16b}, v9.16b + sub v8.16b, v8.16b, v12.16b + tbx \in2\().16b, {v24.16b-v27.16b}, v10.16b + sub v9.16b, v9.16b, v12.16b + tbx \in3\().16b, {v24.16b-v27.16b}, v11.16b + sub v10.16b, v10.16b, v12.16b + tbx \in0\().16b, {v28.16b-v31.16b}, v8.16b + sub v11.16b, v11.16b, v12.16b + tbx \in1\().16b, {v28.16b-v31.16b}, v9.16b + tbx \in2\().16b, {v28.16b-v31.16b}, v10.16b + tbx \in3\().16b, {v28.16b-v31.16b}, v11.16b + .endm + + .macro mul_by_x_2x, out0, out1, in0, in1, tmp0, tmp1, const + sshr \tmp0\().16b, \in0\().16b, #7 + add \out0\().16b, \in0\().16b, \in0\().16b + sshr \tmp1\().16b, \in1\().16b, #7 + and \tmp0\().16b, \tmp0\().16b, \const\().16b + add \out1\().16b, \in1\().16b, \in1\().16b + and \tmp1\().16b, \tmp1\().16b, \const\().16b + eor \out0\().16b, \out0\().16b, \tmp0\().16b + eor \out1\().16b, \out1\().16b, \tmp1\().16b + .endm + + .macro mix_columns_2x, in0, in1 + mul_by_x_2x v8, v9, \in0, \in1, v10, v11, v14 + rev32 v10.8h, \in0\().8h + rev32 v11.8h, \in1\().8h + eor \in0\().16b, v8.16b, \in0\().16b + eor \in1\().16b, v9.16b, \in1\().16b + shl v12.4s, v10.4s, #24 + shl v13.4s, v11.4s, #24 + eor v8.16b, v8.16b, v10.16b + sri v12.4s, v10.4s, #8 + shl v10.4s, \in0\().4s, #24 + eor v9.16b, v9.16b, v11.16b + sri v13.4s, v11.4s, #8 + shl v11.4s, \in1\().4s, #24 + sri v10.4s, \in0\().4s, #8 + eor \in0\().16b, v8.16b, v12.16b + sri v11.4s, \in1\().4s, #8 + eor \in1\().16b, v9.16b, v13.16b + eor \in0\().16b, v10.16b, \in0\().16b + eor \in1\().16b, v11.16b, \in1\().16b + .endm + + .macro inv_mix_cols_2x, in0, in1 + mul_by_x_2x v8, v9, \in0, \in1, v10, v11, v14 + mul_by_x_2x v8, v9, v8, v9, v10, v11, v14 + eor \in0\().16b, \in0\().16b, v8.16b + eor \in1\().16b, \in1\().16b, v9.16b + rev32 v8.8h, v8.8h + rev32 v9.8h, v9.8h + eor \in0\().16b, \in0\().16b, v8.16b + eor \in1\().16b, \in1\().16b, v9.16b + mix_columns_2x \in0, \in1 + .endm + + .macro inv_mix_cols_4x, in0, in1, in2, in3 + mul_by_x_2x v8, v9, \in0, \in1, v10, v11, v14 + mul_by_x_2x v10, v11, \in2, \in3, v12, v13, v14 + mul_by_x_2x v8, v9, v8, v9, v12, v13, v14 + mul_by_x_2x v10, v11, v10, v11, v12, v13, v14 + eor \in0\().16b, \in0\().16b, v8.16b + eor \in1\().16b, \in1\().16b, v9.16b + eor \in2\().16b, \in2\().16b, v10.16b + eor \in3\().16b, \in3\().16b, v11.16b + rev32 v8.8h, v8.8h + rev32 v9.8h, v9.8h + rev32 v10.8h, v10.8h + rev32 v11.8h, v11.8h + eor \in0\().16b, \in0\().16b, v8.16b + eor \in1\().16b, \in1\().16b, v9.16b + eor \in2\().16b, \in2\().16b, v10.16b + eor \in3\().16b, \in3\().16b, v11.16b + mix_columns_2x \in0, \in1 + mix_columns_2x \in2, \in3 + .endm + + .macro do_block_2x, enc, in0, in1 rounds, rk, rkp, i + ld1 {v15.16b}, [\rk] + add \rkp, \rk, #16 + mov \i, \rounds +1111: eor \in0\().16b, \in0\().16b, v15.16b /* ^round key */ + eor \in1\().16b, \in1\().16b, v15.16b /* ^round key */ + sub_bytes_2x \in0, \in1 + tbl \in0\().16b, {\in0\().16b}, v13.16b /* ShiftRows */ + tbl \in1\().16b, {\in1\().16b}, v13.16b /* ShiftRows */ + ld1 {v15.16b}, [\rkp], #16 + subs \i, \i, #1 + beq 2222f + .if \enc == 1 + mix_columns_2x \in0, \in1 + ldr q13, .LForward_ShiftRows + .else + inv_mix_cols_2x \in0, \in1 + ldr q13, .LReverse_ShiftRows + .endif + movi v12.16b, #0x40 + b 1111b +2222: eor \in0\().16b, \in0\().16b, v15.16b /* ^round key */ + eor \in1\().16b, \in1\().16b, v15.16b /* ^round key */ + .endm + + .macro do_block_4x, enc, in0, in1, in2, in3, rounds, rk, rkp, i + ld1 {v15.16b}, [\rk] + add \rkp, \rk, #16 + mov \i, \rounds +1111: eor \in0\().16b, \in0\().16b, v15.16b /* ^round key */ + eor \in1\().16b, \in1\().16b, v15.16b /* ^round key */ + eor \in2\().16b, \in2\().16b, v15.16b /* ^round key */ + eor \in3\().16b, \in3\().16b, v15.16b /* ^round key */ + sub_bytes_4x \in0, \in1, \in2, \in3 + tbl \in0\().16b, {\in0\().16b}, v13.16b /* ShiftRows */ + tbl \in1\().16b, {\in1\().16b}, v13.16b /* ShiftRows */ + tbl \in2\().16b, {\in2\().16b}, v13.16b /* ShiftRows */ + tbl \in3\().16b, {\in3\().16b}, v13.16b /* ShiftRows */ + ld1 {v15.16b}, [\rkp], #16 + subs \i, \i, #1 + beq 2222f + .if \enc == 1 + mix_columns_2x \in0, \in1 + mix_columns_2x \in2, \in3 + ldr q13, .LForward_ShiftRows + .else + inv_mix_cols_4x \in0, \in1, \in2, \in3 + ldr q13, .LReverse_ShiftRows + .endif + movi v12.16b, #0x40 + b 1111b +2222: eor \in0\().16b, \in0\().16b, v15.16b /* ^round key */ + eor \in1\().16b, \in1\().16b, v15.16b /* ^round key */ + eor \in2\().16b, \in2\().16b, v15.16b /* ^round key */ + eor \in3\().16b, \in3\().16b, v15.16b /* ^round key */ + .endm + + .macro encrypt_block2x, in0, in1, rounds, rk, rkp, i + do_block_2x 1, \in0, \in1, \rounds, \rk, \rkp, \i + .endm + + .macro decrypt_block2x, in0, in1, rounds, rk, rkp, i + do_block_2x 0, \in0, \in1, \rounds, \rk, \rkp, \i + .endm + + .macro encrypt_block4x, in0, in1, in2, in3, rounds, rk, rkp, i + do_block_4x 1, \in0, \in1, \in2, \in3, \rounds, \rk, \rkp, \i + .endm + + .macro decrypt_block4x, in0, in1, in2, in3, rounds, rk, rkp, i + do_block_4x 0, \in0, \in1, \in2, \in3, \rounds, \rk, \rkp, \i + .endm + +#include "aes-modes.S" + + .text + .align 4 +.LForward_ShiftRows: + .byte 0x0, 0x5, 0xa, 0xf, 0x4, 0x9, 0xe, 0x3 + .byte 0x8, 0xd, 0x2, 0x7, 0xc, 0x1, 0x6, 0xb + +.LReverse_ShiftRows: + .byte 0x0, 0xd, 0xa, 0x7, 0x4, 0x1, 0xe, 0xb + .byte 0x8, 0x5, 0x2, 0xf, 0xc, 0x9, 0x6, 0x3 + +.LForward_Sbox: + .byte 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5 + .byte 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76 + .byte 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0 + .byte 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0 + .byte 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc + .byte 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15 + .byte 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a + .byte 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75 + .byte 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0 + .byte 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84 + .byte 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b + .byte 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf + .byte 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85 + .byte 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8 + .byte 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5 + .byte 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2 + .byte 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17 + .byte 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73 + .byte 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88 + .byte 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb + .byte 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c + .byte 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79 + .byte 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9 + .byte 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08 + .byte 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6 + .byte 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a + .byte 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e + .byte 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e + .byte 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94 + .byte 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf + .byte 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68 + .byte 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16 + +.LReverse_Sbox: + .byte 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38 + .byte 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb + .byte 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87 + .byte 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb + .byte 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d + .byte 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e + .byte 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2 + .byte 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25 + .byte 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16 + .byte 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92 + .byte 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda + .byte 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84 + .byte 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a + .byte 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06 + .byte 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02 + .byte 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b + .byte 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea + .byte 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73 + .byte 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85 + .byte 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e + .byte 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89 + .byte 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b + .byte 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20 + .byte 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4 + .byte 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31 + .byte 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f + .byte 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d + .byte 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef + .byte 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0 + .byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61 + .byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26 + .byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d From 0680e92ef3a88e4168aee4cf1b95130373f25b25 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 16 Jun 2014 11:02:15 +0100 Subject: [PATCH 58/82] arm64/crypto: fix data corruption bug in GHASH algorithm This fixes a bug in the GHASH algorithm resulting in the calculated hash to be incorrect if the input is presented in chunks whose size is not a multiple of 16 bytes. Signed-off-by: Ard Biesheuvel Fixes: fdd2389457b2 ("arm64/crypto: GHASH secure hash using ARMv8 Crypto Extensions") Signed-off-by: Catalin Marinas --- arch/arm64/crypto/ghash-ce-glue.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/crypto/ghash-ce-glue.c b/arch/arm64/crypto/ghash-ce-glue.c index b92baf3f68c7..ef6aa69c4e0c 100644 --- a/arch/arm64/crypto/ghash-ce-glue.c +++ b/arch/arm64/crypto/ghash-ce-glue.c @@ -72,6 +72,7 @@ static int ghash_update(struct shash_desc *desc, const u8 *src, partial ? ctx->buf : NULL); kernel_neon_end(); src += blocks * GHASH_BLOCK_SIZE; + partial = 0; } if (len) memcpy(ctx->buf + partial, src, len); From 945ce0a7bdc63b712f332848c51d5505841d50e0 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 16 Jun 2014 11:02:16 +0100 Subject: [PATCH 59/82] arm64/crypto: improve performance of GHASH algorithm This patches modifies the GHASH secure hash implementation to switch to a faster, polynomial multiplication based reduction instead of one that uses shifts and rotates. Signed-off-by: Ard Biesheuvel Signed-off-by: Catalin Marinas --- arch/arm64/crypto/ghash-ce-core.S | 96 +++++++++++++------------------ arch/arm64/crypto/ghash-ce-glue.c | 4 +- 2 files changed, 42 insertions(+), 58 deletions(-) diff --git a/arch/arm64/crypto/ghash-ce-core.S b/arch/arm64/crypto/ghash-ce-core.S index b9e6eaf41c9b..dc457015884e 100644 --- a/arch/arm64/crypto/ghash-ce-core.S +++ b/arch/arm64/crypto/ghash-ce-core.S @@ -3,14 +3,6 @@ * * Copyright (C) 2014 Linaro Ltd. * - * Based on arch/x86/crypto/ghash-pmullni-intel_asm.S - * - * Copyright (c) 2009 Intel Corp. - * Author: Huang Ying - * Vinodh Gopal - * Erdinc Ozturk - * Deniz Karakoyunlu - * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License version 2 as published * by the Free Software Foundation. @@ -19,13 +11,15 @@ #include #include - DATA .req v0 - SHASH .req v1 - IN1 .req v2 + SHASH .req v0 + SHASH2 .req v1 T1 .req v2 T2 .req v3 - T3 .req v4 - VZR .req v5 + MASK .req v4 + XL .req v5 + XM .req v6 + XH .req v7 + IN1 .req v7 .text .arch armv8-a+crypto @@ -35,61 +29,51 @@ * struct ghash_key const *k, const char *head) */ ENTRY(pmull_ghash_update) - ld1 {DATA.16b}, [x1] ld1 {SHASH.16b}, [x3] - eor VZR.16b, VZR.16b, VZR.16b + ld1 {XL.16b}, [x1] + movi MASK.16b, #0xe1 + ext SHASH2.16b, SHASH.16b, SHASH.16b, #8 + shl MASK.2d, MASK.2d, #57 + eor SHASH2.16b, SHASH2.16b, SHASH.16b /* do the head block first, if supplied */ cbz x4, 0f - ld1 {IN1.2d}, [x4] + ld1 {T1.2d}, [x4] b 1f -0: ld1 {IN1.2d}, [x2], #16 +0: ld1 {T1.2d}, [x2], #16 sub w0, w0, #1 -1: ext IN1.16b, IN1.16b, IN1.16b, #8 -CPU_LE( rev64 IN1.16b, IN1.16b ) - eor DATA.16b, DATA.16b, IN1.16b - /* multiply DATA by SHASH in GF(2^128) */ - ext T2.16b, DATA.16b, DATA.16b, #8 - ext T3.16b, SHASH.16b, SHASH.16b, #8 - eor T2.16b, T2.16b, DATA.16b - eor T3.16b, T3.16b, SHASH.16b +1: /* multiply XL by SHASH in GF(2^128) */ +CPU_LE( rev64 T1.16b, T1.16b ) - pmull2 T1.1q, SHASH.2d, DATA.2d // a1 * b1 - pmull DATA.1q, SHASH.1d, DATA.1d // a0 * b0 - pmull T2.1q, T2.1d, T3.1d // (a1 + a0)(b1 + b0) - eor T2.16b, T2.16b, T1.16b // (a0 * b1) + (a1 * b0) - eor T2.16b, T2.16b, DATA.16b - - ext T3.16b, VZR.16b, T2.16b, #8 - ext T2.16b, T2.16b, VZR.16b, #8 - eor DATA.16b, DATA.16b, T3.16b - eor T1.16b, T1.16b, T2.16b // is result of - // carry-less multiplication - - /* first phase of the reduction */ - shl T3.2d, DATA.2d, #1 - eor T3.16b, T3.16b, DATA.16b - shl T3.2d, T3.2d, #5 - eor T3.16b, T3.16b, DATA.16b - shl T3.2d, T3.2d, #57 - ext T2.16b, VZR.16b, T3.16b, #8 - ext T3.16b, T3.16b, VZR.16b, #8 - eor DATA.16b, DATA.16b, T2.16b - eor T1.16b, T1.16b, T3.16b - - /* second phase of the reduction */ - ushr T2.2d, DATA.2d, #5 - eor T2.16b, T2.16b, DATA.16b - ushr T2.2d, T2.2d, #1 - eor T2.16b, T2.16b, DATA.16b - ushr T2.2d, T2.2d, #1 + ext T2.16b, XL.16b, XL.16b, #8 + ext IN1.16b, T1.16b, T1.16b, #8 eor T1.16b, T1.16b, T2.16b - eor DATA.16b, DATA.16b, T1.16b + eor XL.16b, XL.16b, IN1.16b + + pmull2 XH.1q, SHASH.2d, XL.2d // a1 * b1 + eor T1.16b, T1.16b, XL.16b + pmull XL.1q, SHASH.1d, XL.1d // a0 * b0 + pmull XM.1q, SHASH2.1d, T1.1d // (a1 + a0)(b1 + b0) + + ext T1.16b, XL.16b, XH.16b, #8 + eor T2.16b, XL.16b, XH.16b + eor XM.16b, XM.16b, T1.16b + eor XM.16b, XM.16b, T2.16b + pmull T2.1q, XL.1d, MASK.1d + + mov XH.d[0], XM.d[1] + mov XM.d[1], XL.d[0] + + eor XL.16b, XM.16b, T2.16b + ext T2.16b, XL.16b, XL.16b, #8 + pmull XL.1q, XL.1d, MASK.1d + eor T2.16b, T2.16b, XH.16b + eor XL.16b, XL.16b, T2.16b cbnz w0, 0b - st1 {DATA.16b}, [x1] + st1 {XL.16b}, [x1] ret ENDPROC(pmull_ghash_update) diff --git a/arch/arm64/crypto/ghash-ce-glue.c b/arch/arm64/crypto/ghash-ce-glue.c index ef6aa69c4e0c..833ec1e3f3e9 100644 --- a/arch/arm64/crypto/ghash-ce-glue.c +++ b/arch/arm64/crypto/ghash-ce-glue.c @@ -67,7 +67,7 @@ static int ghash_update(struct shash_desc *desc, const u8 *src, blocks = len / GHASH_BLOCK_SIZE; len %= GHASH_BLOCK_SIZE; - kernel_neon_begin_partial(6); + kernel_neon_begin_partial(8); pmull_ghash_update(blocks, ctx->digest, src, key, partial ? ctx->buf : NULL); kernel_neon_end(); @@ -89,7 +89,7 @@ static int ghash_final(struct shash_desc *desc, u8 *dst) memset(ctx->buf + partial, 0, GHASH_BLOCK_SIZE - partial); - kernel_neon_begin_partial(6); + kernel_neon_begin_partial(8); pmull_ghash_update(1, ctx->digest, ctx->buf, key, NULL); kernel_neon_end(); } From 181dbc7cc2ba7135f08aa8647b7817cc37b8ffa7 Mon Sep 17 00:00:00 2001 From: Andreas Schwab Date: Thu, 24 Jul 2014 17:03:26 +0100 Subject: [PATCH 60/82] arm64/crypto: fix makefile rule for aes-glue-%.o This fixes the following build failure when building with CONFIG_MODVERSIONS enabled: CC [M] arch/arm64/crypto/aes-glue-ce.o ld: cannot find arch/arm64/crypto/aes-glue-ce.o: No such file or directory make[1]: *** [arch/arm64/crypto/aes-ce-blk.o] Error 1 make: *** [arch/arm64/crypto] Error 2 The $(obj)/aes-glue-%.o rule only creates $(obj)/.tmp_aes-glue-ce.o, it should use if_changed_rule instead of if_changed_dep. Signed-off-by: Andreas Schwab [ardb: mention CONFIG_MODVERSIONS in commit log] Signed-off-by: Ard Biesheuvel Signed-off-by: Catalin Marinas --- arch/arm64/crypto/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/crypto/Makefile b/arch/arm64/crypto/Makefile index 2070a56ecc46..a3f935fde975 100644 --- a/arch/arm64/crypto/Makefile +++ b/arch/arm64/crypto/Makefile @@ -35,4 +35,4 @@ AFLAGS_aes-neon.o := -DINTERLEAVE=4 CFLAGS_aes-glue-ce.o := -DUSE_V8_CRYPTO_EXTENSIONS $(obj)/aes-glue-%.o: $(src)/aes-glue.c FORCE - $(call if_changed_dep,cc_o_c) + $(call if_changed_rule,cc_o_c) From be64e76c02d51b5323f228da3575b92ae03e25ce Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Fri, 25 Jul 2014 19:40:20 -0400 Subject: [PATCH 61/82] crypto: arm64-aes - fix encryption of unaligned data cryptsetup fails on arm64 when using kernel encryption via AF_ALG socket. See https://bugzilla.redhat.com/show_bug.cgi?id=1122937 The bug is caused by incorrect handling of unaligned data in arch/arm64/crypto/aes-glue.c. Cryptsetup creates a buffer that is aligned on 8 bytes, but not on 16 bytes. It opens AF_ALG socket and uses the socket to encrypt data in the buffer. The arm64 crypto accelerator causes data corruption or crashes in the scatterwalk_pagedone. This patch fixes the bug by passing the residue bytes that were not processed as the last parameter to blkcipher_walk_done. Signed-off-by: Mikulas Patocka Acked-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- arch/arm64/crypto/aes-glue.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/arm64/crypto/aes-glue.c b/arch/arm64/crypto/aes-glue.c index 60f2f4c12256..79cd911ef88c 100644 --- a/arch/arm64/crypto/aes-glue.c +++ b/arch/arm64/crypto/aes-glue.c @@ -106,7 +106,7 @@ static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) { aes_ecb_encrypt(walk.dst.virt.addr, walk.src.virt.addr, (u8 *)ctx->key_enc, rounds, blocks, first); - err = blkcipher_walk_done(desc, &walk, 0); + err = blkcipher_walk_done(desc, &walk, walk.nbytes % AES_BLOCK_SIZE); } kernel_neon_end(); return err; @@ -128,7 +128,7 @@ static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) { aes_ecb_decrypt(walk.dst.virt.addr, walk.src.virt.addr, (u8 *)ctx->key_dec, rounds, blocks, first); - err = blkcipher_walk_done(desc, &walk, 0); + err = blkcipher_walk_done(desc, &walk, walk.nbytes % AES_BLOCK_SIZE); } kernel_neon_end(); return err; @@ -151,7 +151,7 @@ static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, aes_cbc_encrypt(walk.dst.virt.addr, walk.src.virt.addr, (u8 *)ctx->key_enc, rounds, blocks, walk.iv, first); - err = blkcipher_walk_done(desc, &walk, 0); + err = blkcipher_walk_done(desc, &walk, walk.nbytes % AES_BLOCK_SIZE); } kernel_neon_end(); return err; @@ -174,7 +174,7 @@ static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, aes_cbc_decrypt(walk.dst.virt.addr, walk.src.virt.addr, (u8 *)ctx->key_dec, rounds, blocks, walk.iv, first); - err = blkcipher_walk_done(desc, &walk, 0); + err = blkcipher_walk_done(desc, &walk, walk.nbytes % AES_BLOCK_SIZE); } kernel_neon_end(); return err; @@ -243,7 +243,7 @@ static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, aes_xts_encrypt(walk.dst.virt.addr, walk.src.virt.addr, (u8 *)ctx->key1.key_enc, rounds, blocks, (u8 *)ctx->key2.key_enc, walk.iv, first); - err = blkcipher_walk_done(desc, &walk, 0); + err = blkcipher_walk_done(desc, &walk, walk.nbytes % AES_BLOCK_SIZE); } kernel_neon_end(); @@ -267,7 +267,7 @@ static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, aes_xts_decrypt(walk.dst.virt.addr, walk.src.virt.addr, (u8 *)ctx->key1.key_dec, rounds, blocks, (u8 *)ctx->key2.key_enc, walk.iv, first); - err = blkcipher_walk_done(desc, &walk, 0); + err = blkcipher_walk_done(desc, &walk, walk.nbytes % AES_BLOCK_SIZE); } kernel_neon_end(); From 2f5acd7615cca9aa638cb54d69706bb4ac21fa94 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 20 Sep 2013 09:55:40 +0200 Subject: [PATCH 62/82] crypto: create generic version of ablk_helper Create a generic version of ablk_helper so it can be reused by other architectures. Acked-by: Jussi Kivilinna Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- crypto/Kconfig | 4 + crypto/Makefile | 1 + crypto/ablk_helper.c | 150 +++++++++++++++++++++++++++++++++++ include/asm-generic/simd.h | 14 ++++ include/crypto/ablk_helper.h | 31 ++++++++ 5 files changed, 200 insertions(+) create mode 100644 crypto/ablk_helper.c create mode 100644 include/asm-generic/simd.h create mode 100644 include/crypto/ablk_helper.h diff --git a/crypto/Kconfig b/crypto/Kconfig index bf8148e74e73..a1eba1845367 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -179,6 +179,10 @@ config CRYPTO_ABLK_HELPER_X86 depends on X86 select CRYPTO_CRYPTD +config CRYPTO_ABLK_HELPER + tristate + select CRYPTO_CRYPTD + config CRYPTO_GLUE_HELPER_X86 tristate depends on X86 diff --git a/crypto/Makefile b/crypto/Makefile index a8e9b0fefbe9..5d0b869b173f 100644 --- a/crypto/Makefile +++ b/crypto/Makefile @@ -101,3 +101,4 @@ obj-$(CONFIG_CRYPTO_USER_API_SKCIPHER) += algif_skcipher.o obj-$(CONFIG_XOR_BLOCKS) += xor.o obj-$(CONFIG_ASYNC_CORE) += async_tx/ obj-$(CONFIG_ASYMMETRIC_KEY_TYPE) += asymmetric_keys/ +obj-$(CONFIG_CRYPTO_ABLK_HELPER) += ablk_helper.o diff --git a/crypto/ablk_helper.c b/crypto/ablk_helper.c new file mode 100644 index 000000000000..62568b1fc885 --- /dev/null +++ b/crypto/ablk_helper.c @@ -0,0 +1,150 @@ +/* + * Shared async block cipher helpers + * + * Copyright (c) 2012 Jussi Kivilinna + * + * Based on aesni-intel_glue.c by: + * Copyright (C) 2008, Intel Corp. + * Author: Huang Ying + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 + * USA + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +int ablk_set_key(struct crypto_ablkcipher *tfm, const u8 *key, + unsigned int key_len) +{ + struct async_helper_ctx *ctx = crypto_ablkcipher_ctx(tfm); + struct crypto_ablkcipher *child = &ctx->cryptd_tfm->base; + int err; + + crypto_ablkcipher_clear_flags(child, CRYPTO_TFM_REQ_MASK); + crypto_ablkcipher_set_flags(child, crypto_ablkcipher_get_flags(tfm) + & CRYPTO_TFM_REQ_MASK); + err = crypto_ablkcipher_setkey(child, key, key_len); + crypto_ablkcipher_set_flags(tfm, crypto_ablkcipher_get_flags(child) + & CRYPTO_TFM_RES_MASK); + return err; +} +EXPORT_SYMBOL_GPL(ablk_set_key); + +int __ablk_encrypt(struct ablkcipher_request *req) +{ + struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req); + struct async_helper_ctx *ctx = crypto_ablkcipher_ctx(tfm); + struct blkcipher_desc desc; + + desc.tfm = cryptd_ablkcipher_child(ctx->cryptd_tfm); + desc.info = req->info; + desc.flags = 0; + + return crypto_blkcipher_crt(desc.tfm)->encrypt( + &desc, req->dst, req->src, req->nbytes); +} +EXPORT_SYMBOL_GPL(__ablk_encrypt); + +int ablk_encrypt(struct ablkcipher_request *req) +{ + struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req); + struct async_helper_ctx *ctx = crypto_ablkcipher_ctx(tfm); + + if (!may_use_simd()) { + struct ablkcipher_request *cryptd_req = + ablkcipher_request_ctx(req); + + memcpy(cryptd_req, req, sizeof(*req)); + ablkcipher_request_set_tfm(cryptd_req, &ctx->cryptd_tfm->base); + + return crypto_ablkcipher_encrypt(cryptd_req); + } else { + return __ablk_encrypt(req); + } +} +EXPORT_SYMBOL_GPL(ablk_encrypt); + +int ablk_decrypt(struct ablkcipher_request *req) +{ + struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req); + struct async_helper_ctx *ctx = crypto_ablkcipher_ctx(tfm); + + if (!may_use_simd()) { + struct ablkcipher_request *cryptd_req = + ablkcipher_request_ctx(req); + + memcpy(cryptd_req, req, sizeof(*req)); + ablkcipher_request_set_tfm(cryptd_req, &ctx->cryptd_tfm->base); + + return crypto_ablkcipher_decrypt(cryptd_req); + } else { + struct blkcipher_desc desc; + + desc.tfm = cryptd_ablkcipher_child(ctx->cryptd_tfm); + desc.info = req->info; + desc.flags = 0; + + return crypto_blkcipher_crt(desc.tfm)->decrypt( + &desc, req->dst, req->src, req->nbytes); + } +} +EXPORT_SYMBOL_GPL(ablk_decrypt); + +void ablk_exit(struct crypto_tfm *tfm) +{ + struct async_helper_ctx *ctx = crypto_tfm_ctx(tfm); + + cryptd_free_ablkcipher(ctx->cryptd_tfm); +} +EXPORT_SYMBOL_GPL(ablk_exit); + +int ablk_init_common(struct crypto_tfm *tfm, const char *drv_name) +{ + struct async_helper_ctx *ctx = crypto_tfm_ctx(tfm); + struct cryptd_ablkcipher *cryptd_tfm; + + cryptd_tfm = cryptd_alloc_ablkcipher(drv_name, 0, 0); + if (IS_ERR(cryptd_tfm)) + return PTR_ERR(cryptd_tfm); + + ctx->cryptd_tfm = cryptd_tfm; + tfm->crt_ablkcipher.reqsize = sizeof(struct ablkcipher_request) + + crypto_ablkcipher_reqsize(&cryptd_tfm->base); + + return 0; +} +EXPORT_SYMBOL_GPL(ablk_init_common); + +int ablk_init(struct crypto_tfm *tfm) +{ + char drv_name[CRYPTO_MAX_ALG_NAME]; + + snprintf(drv_name, sizeof(drv_name), "__driver-%s", + crypto_tfm_alg_driver_name(tfm)); + + return ablk_init_common(tfm, drv_name); +} +EXPORT_SYMBOL_GPL(ablk_init); + +MODULE_LICENSE("GPL"); diff --git a/include/asm-generic/simd.h b/include/asm-generic/simd.h new file mode 100644 index 000000000000..f57eb7b5c23b --- /dev/null +++ b/include/asm-generic/simd.h @@ -0,0 +1,14 @@ + +#include + +/* + * may_use_simd - whether it is allowable at this time to issue SIMD + * instructions or access the SIMD register file + * + * As architectures typically don't preserve the SIMD register file when + * taking an interrupt, !in_interrupt() should be a reasonable default. + */ +static __must_check inline bool may_use_simd(void) +{ + return !in_interrupt(); +} diff --git a/include/crypto/ablk_helper.h b/include/crypto/ablk_helper.h new file mode 100644 index 000000000000..4f93df50c23e --- /dev/null +++ b/include/crypto/ablk_helper.h @@ -0,0 +1,31 @@ +/* + * Shared async block cipher helpers + */ + +#ifndef _CRYPTO_ABLK_HELPER_H +#define _CRYPTO_ABLK_HELPER_H + +#include +#include +#include + +struct async_helper_ctx { + struct cryptd_ablkcipher *cryptd_tfm; +}; + +extern int ablk_set_key(struct crypto_ablkcipher *tfm, const u8 *key, + unsigned int key_len); + +extern int __ablk_encrypt(struct ablkcipher_request *req); + +extern int ablk_encrypt(struct ablkcipher_request *req); + +extern int ablk_decrypt(struct ablkcipher_request *req); + +extern void ablk_exit(struct crypto_tfm *tfm); + +extern int ablk_init_common(struct crypto_tfm *tfm, const char *drv_name); + +extern int ablk_init(struct crypto_tfm *tfm); + +#endif /* _CRYPTO_ABLK_HELPER_H */ From 59472af5848e3063c149baa92a35882ce3f6f291 Mon Sep 17 00:00:00 2001 From: kbuild test robot Date: Tue, 24 Sep 2013 08:21:29 +0800 Subject: [PATCH 63/82] crypto: ablk_helper - Replace memcpy with struct assignment tree: git://git.kernel.org/pub/scm/linux/kernel/git/herbert/cryptodev-2.6.git master head: 48e6dc1b2a1ad8186d48968d5018912bdacac744 commit: a62b01cd6cc1feb5e80d64d6937c291473ed82cb [20/24] crypto: create generic version of ablk_helper coccinelle warnings: (new ones prefixed by >>) >> crypto/ablk_helper.c:97:2-8: Replace memcpy with struct assignment >> crypto/ablk_helper.c:78:2-8: Replace memcpy with struct assignment Please consider folding the attached diff :-) Signed-off-by: Herbert Xu --- crypto/ablk_helper.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/crypto/ablk_helper.c b/crypto/ablk_helper.c index 62568b1fc885..ffe7278d4bd8 100644 --- a/crypto/ablk_helper.c +++ b/crypto/ablk_helper.c @@ -75,7 +75,7 @@ int ablk_encrypt(struct ablkcipher_request *req) struct ablkcipher_request *cryptd_req = ablkcipher_request_ctx(req); - memcpy(cryptd_req, req, sizeof(*req)); + *cryptd_req = *req; ablkcipher_request_set_tfm(cryptd_req, &ctx->cryptd_tfm->base); return crypto_ablkcipher_encrypt(cryptd_req); @@ -94,7 +94,7 @@ int ablk_decrypt(struct ablkcipher_request *req) struct ablkcipher_request *cryptd_req = ablkcipher_request_ctx(req); - memcpy(cryptd_req, req, sizeof(*req)); + *cryptd_req = *req; ablkcipher_request_set_tfm(cryptd_req, &ctx->cryptd_tfm->base); return crypto_ablkcipher_decrypt(cryptd_req); From 197028d295db718f4e5cf8af9038cfeb4823b4ef Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Sat, 8 Feb 2014 13:34:09 +0100 Subject: [PATCH 64/82] cpu: add generic support for CPU feature based module autoloading This patch adds support for advertising optional CPU features over udev using the modalias, and for declaring compatibility with/dependency upon such a feature in a module. The mapping between feature numbers and actual features should be provided by the architecture in a file called which exports the following functions/macros: - cpu_feature(FEAT), a preprocessor macro that maps token FEAT to a numeric index; - bool cpu_have_feature(n), returning whether this CPU has support for feature #n; - MAX_CPU_FEATURES, an upper bound for 'n' in the previous function. The feature can then be enabled by setting CONFIG_GENERIC_CPU_AUTOPROBE for the architecture. For instance, a module that registers its module init function using module_cpu_feature_match(FEAT_X, module_init_function) will be probed automatically when the CPU's support for the 'FEAT_X' feature is advertised over udev, and will only allow the module to be loaded by hand if the 'FEAT_X' feature is supported. Change-Id: Icae8e3ff347235fc72a5b41279f0afdb34fb161a Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- drivers/base/Kconfig | 8 +++++ drivers/base/cpu.c | 48 ++++++++++++++++++++++--- include/linux/cpufeature.h | 60 +++++++++++++++++++++++++++++++ include/linux/mod_devicetable.h | 9 +++++ scripts/mod/devicetable-offsets.c | 3 ++ scripts/mod/file2alias.c | 10 ++++++ 6 files changed, 134 insertions(+), 4 deletions(-) create mode 100644 include/linux/cpufeature.h diff --git a/drivers/base/Kconfig b/drivers/base/Kconfig index 07abd9d76f7f..9634800e800f 100644 --- a/drivers/base/Kconfig +++ b/drivers/base/Kconfig @@ -187,6 +187,14 @@ config GENERIC_CPU_DEVICES bool default n +config HAVE_CPU_AUTOPROBE + def_bool ARCH_HAS_CPU_AUTOPROBE + +config GENERIC_CPU_AUTOPROBE + bool + depends on !ARCH_HAS_CPU_AUTOPROBE + select HAVE_CPU_AUTOPROBE + config SOC_BUS bool diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c index 3d48fc887ef4..b5668501e3d3 100644 --- a/drivers/base/cpu.c +++ b/drivers/base/cpu.c @@ -13,6 +13,7 @@ #include #include #include +#include #include "base.h" @@ -260,6 +261,45 @@ static void cpu_device_release(struct device *dev) */ } +#ifdef CONFIG_HAVE_CPU_AUTOPROBE +#ifdef CONFIG_GENERIC_CPU_AUTOPROBE +static ssize_t print_cpu_modalias(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + ssize_t n; + u32 i; + + n = sprintf(buf, "cpu:type:" CPU_FEATURE_TYPEFMT ":feature:", + CPU_FEATURE_TYPEVAL); + + for (i = 0; i < MAX_CPU_FEATURES; i++) + if (cpu_have_feature(i)) { + if (PAGE_SIZE < n + sizeof(",XXXX\n")) { + WARN(1, "CPU features overflow page\n"); + break; + } + n += sprintf(&buf[n], ",%04X", i); + } + buf[n++] = '\n'; + return n; +} +#else +#define print_cpu_modalias arch_print_cpu_modalias +#endif + +static int cpu_uevent(struct device *dev, struct kobj_uevent_env *env) +{ + char *buf = kzalloc(PAGE_SIZE, GFP_KERNEL); + if (buf) { + print_cpu_modalias(NULL, NULL, buf); + add_uevent_var(env, "MODALIAS=%s", buf); + kfree(buf); + } + return 0; +} +#endif + /* * register_cpu - Setup a sysfs device for a CPU. * @cpu - cpu->hotpluggable field set to 1 will generate a control file in @@ -278,7 +318,7 @@ int __cpuinit register_cpu(struct cpu *cpu, int num) cpu->dev.bus = &cpu_subsys; cpu->dev.release = cpu_device_release; #ifdef CONFIG_ARCH_HAS_CPU_AUTOPROBE - cpu->dev.bus->uevent = arch_cpu_uevent; + cpu->dev.bus->uevent = cpu_uevent; #endif error = device_register(&cpu->dev); if (!error && cpu->hotpluggable) @@ -307,8 +347,8 @@ struct device *get_cpu_device(unsigned cpu) } EXPORT_SYMBOL_GPL(get_cpu_device); -#ifdef CONFIG_ARCH_HAS_CPU_AUTOPROBE -static DEVICE_ATTR(modalias, 0444, arch_print_cpu_modalias, NULL); +#ifdef CONFIG_HAVE_CPU_AUTOPROBE +static DEVICE_ATTR(modalias, 0444, print_cpu_modalias, NULL); #endif static struct attribute *cpu_root_attrs[] = { @@ -321,7 +361,7 @@ static struct attribute *cpu_root_attrs[] = { &cpu_attrs[2].attr.attr, &dev_attr_kernel_max.attr, &dev_attr_offline.attr, -#ifdef CONFIG_ARCH_HAS_CPU_AUTOPROBE +#ifdef CONFIG_HAVE_CPU_AUTOPROBE &dev_attr_modalias.attr, #endif NULL diff --git a/include/linux/cpufeature.h b/include/linux/cpufeature.h new file mode 100644 index 000000000000..c4d4eb8ac9fe --- /dev/null +++ b/include/linux/cpufeature.h @@ -0,0 +1,60 @@ +/* + * Copyright (C) 2014 Linaro Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef __LINUX_CPUFEATURE_H +#define __LINUX_CPUFEATURE_H + +#ifdef CONFIG_GENERIC_CPU_AUTOPROBE + +#include +#include + +/* + * Macros imported from : + * - cpu_feature(x) ordinal value of feature called 'x' + * - cpu_have_feature(u32 n) whether feature #n is available + * - MAX_CPU_FEATURES upper bound for feature ordinal values + * Optional: + * - CPU_FEATURE_TYPEFMT format string fragment for printing the cpu type + * - CPU_FEATURE_TYPEVAL set of values matching the format string above + */ + +#ifndef CPU_FEATURE_TYPEFMT +#define CPU_FEATURE_TYPEFMT "%s" +#endif + +#ifndef CPU_FEATURE_TYPEVAL +#define CPU_FEATURE_TYPEVAL ELF_PLATFORM +#endif + +/* + * Use module_cpu_feature_match(feature, module_init_function) to + * declare that + * a) the module shall be probed upon discovery of CPU feature 'feature' + * (typically at boot time using udev) + * b) the module must not be loaded if CPU feature 'feature' is not present + * (not even by manual insmod). + * + * For a list of legal values for 'feature', please consult the file + * 'asm/cpufeature.h' of your favorite architecture. + */ +#define module_cpu_feature_match(x, __init) \ +static struct cpu_feature const cpu_feature_match_ ## x[] = \ + { { .feature = cpu_feature(x) }, { } }; \ +MODULE_DEVICE_TABLE(cpu, cpu_feature_match_ ## x); \ + \ +static int cpu_feature_match_ ## x ## _init(void) \ +{ \ + if (!cpu_have_feature(cpu_feature(x))) \ + return -ENODEV; \ + return __init(); \ +} \ +module_init(cpu_feature_match_ ## x ## _init) + +#endif +#endif diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h index b508016fb76d..e2c55f297d0b 100644 --- a/include/linux/mod_devicetable.h +++ b/include/linux/mod_devicetable.h @@ -561,6 +561,15 @@ struct x86_cpu_id { #define X86_MODEL_ANY 0 #define X86_FEATURE_ANY 0 /* Same as FPU, you can't test for that */ +/* + * Generic table type for matching CPU features. + * @feature: the bit number of the feature (0 - 65535) + */ + +struct cpu_feature { + __u16 feature; +}; + #define IPACK_ANY_FORMAT 0xff #define IPACK_ANY_ID (~0) struct ipack_device_id { diff --git a/scripts/mod/devicetable-offsets.c b/scripts/mod/devicetable-offsets.c index e66d4d258e1a..5e74595f2e85 100644 --- a/scripts/mod/devicetable-offsets.c +++ b/scripts/mod/devicetable-offsets.c @@ -174,6 +174,9 @@ int main(void) DEVID_FIELD(x86_cpu_id, model); DEVID_FIELD(x86_cpu_id, vendor); + DEVID(cpu_feature); + DEVID_FIELD(cpu_feature, feature); + DEVID(mei_cl_device_id); DEVID_FIELD(mei_cl_device_id, name); diff --git a/scripts/mod/file2alias.c b/scripts/mod/file2alias.c index 45f9a3377dcd..8a9612f17cb0 100644 --- a/scripts/mod/file2alias.c +++ b/scripts/mod/file2alias.c @@ -1133,6 +1133,16 @@ static int do_x86cpu_entry(const char *filename, void *symval, } ADD_TO_DEVTABLE("x86cpu", x86_cpu_id, do_x86cpu_entry); +/* LOOKS like cpu:type:*:feature:*FEAT* */ +static int do_cpu_entry(const char *filename, void *symval, char *alias) +{ + DEF_FIELD(symval, cpu_feature, feature); + + sprintf(alias, "cpu:type:*:feature:*%04X*", feature); + return 1; +} +ADD_TO_DEVTABLE("cpu", cpu_feature, do_cpu_entry); + /* Looks like: mei:S */ static int do_mei_entry(const char *filename, void *symval, char *alias) From f11a3d1bc1528e0a01dde578c2ab88c70b3fbeff Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 24 Feb 2014 15:26:27 +0100 Subject: [PATCH 65/82] arm64: add abstractions for FPSIMD state manipulation There are two tacit assumptions in the FPSIMD handling code that will no longer hold after the next patch that optimizes away some FPSIMD state restores: . the FPSIMD registers of this CPU contain the userland FPSIMD state of task 'current'; . when switching to a task, its FPSIMD state will always be restored from memory. This patch adds the following functions to abstract away from straight FPSIMD register file saves and restores: - fpsimd_preserve_current_state -> ensure current's FPSIMD state is saved - fpsimd_update_current_state -> replace current's FPSIMD state Where necessary, the signal handling and fork code are updated to use the above wrappers instead of poking into the FPSIMD registers directly. Signed-off-by: Ard Biesheuvel Conflicts: arch/arm64/kernel/fpsimd.c Change-Id: I53ae7082427cb1c5cc32e1f2ddbd4218115601ba --- arch/arm64/include/asm/fpsimd.h | 3 ++ arch/arm64/kernel/fpsimd.c | 80 +++++++++++++++++++++++++++++++++ arch/arm64/kernel/process.c | 2 +- arch/arm64/kernel/signal.c | 9 ++-- arch/arm64/kernel/signal32.c | 9 ++-- 5 files changed, 90 insertions(+), 13 deletions(-) diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h index c43b4ac13008..f4e524b67e91 100644 --- a/arch/arm64/include/asm/fpsimd.h +++ b/arch/arm64/include/asm/fpsimd.h @@ -58,6 +58,9 @@ extern void fpsimd_load_state(struct fpsimd_state *state); extern void fpsimd_thread_switch(struct task_struct *next); extern void fpsimd_flush_thread(void); +extern void fpsimd_preserve_current_state(void); +extern void fpsimd_update_current_state(struct fpsimd_state *state); + #endif #endif diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index e8b8357aedb4..135cfb88d2aa 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -83,6 +83,86 @@ void fpsimd_flush_thread(void) fpsimd_load_state(¤t->thread.fpsimd_state); } +/* + * Save the userland FPSIMD state of 'current' to memory + */ +void fpsimd_preserve_current_state(void) +{ + preempt_disable(); + fpsimd_save_state(¤t->thread.fpsimd_state); + preempt_enable(); +} + +/* + * Load an updated userland FPSIMD state for 'current' from memory + */ +void fpsimd_update_current_state(struct fpsimd_state *state) +{ + preempt_disable(); + fpsimd_load_state(state); + preempt_enable(); +} + +#ifdef CONFIG_KERNEL_MODE_NEON + +/* + * Kernel-side NEON support functions + */ +void kernel_neon_begin(void) +{ + /* Avoid using the NEON in interrupt context */ + BUG_ON(in_interrupt()); + preempt_disable(); + + if (current->mm) + fpsimd_save_state(¤t->thread.fpsimd_state); +} +EXPORT_SYMBOL(kernel_neon_begin); + +void kernel_neon_end(void) +{ + if (current->mm) + fpsimd_load_state(¤t->thread.fpsimd_state); + + preempt_enable(); +} +EXPORT_SYMBOL(kernel_neon_end); + +#endif /* CONFIG_KERNEL_MODE_NEON */ + +#ifdef CONFIG_CPU_PM +static int fpsimd_cpu_pm_notifier(struct notifier_block *self, + unsigned long cmd, void *v) +{ + switch (cmd) { + case CPU_PM_ENTER: + if (current->mm) + fpsimd_save_state(¤t->thread.fpsimd_state); + break; + case CPU_PM_EXIT: + if (current->mm) + fpsimd_load_state(¤t->thread.fpsimd_state); + break; + case CPU_PM_ENTER_FAILED: + default: + return NOTIFY_DONE; + } + return NOTIFY_OK; +} + +static struct notifier_block fpsimd_cpu_pm_notifier_block = { + .notifier_call = fpsimd_cpu_pm_notifier, +}; + +static void fpsimd_pm_init(void) +{ + cpu_pm_register_notifier(&fpsimd_cpu_pm_notifier_block); +} + +#else +static inline void fpsimd_pm_init(void) { } +#endif /* CONFIG_CPU_PM */ + /* * FP/SIMD support code initialisation. */ diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index 46f02c3b5015..e2eb9453d3a1 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -184,7 +184,7 @@ void release_thread(struct task_struct *dead_task) int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) { - fpsimd_save_state(¤t->thread.fpsimd_state); + fpsimd_preserve_current_state(); *dst = *src; return 0; } diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c index 890a591f75dd..06448a77ff53 100644 --- a/arch/arm64/kernel/signal.c +++ b/arch/arm64/kernel/signal.c @@ -51,7 +51,7 @@ static int preserve_fpsimd_context(struct fpsimd_context __user *ctx) int err; /* dump the hardware registers to the fpsimd_state structure */ - fpsimd_save_state(fpsimd); + fpsimd_preserve_current_state(); /* copy the FP and status/control registers */ err = __copy_to_user(ctx->vregs, fpsimd->vregs, sizeof(fpsimd->vregs)); @@ -86,11 +86,8 @@ static int restore_fpsimd_context(struct fpsimd_context __user *ctx) __get_user_error(fpsimd.fpcr, &ctx->fpcr, err); /* load the hardware registers from the fpsimd_state structure */ - if (!err) { - preempt_disable(); - fpsimd_load_state(&fpsimd); - preempt_enable(); - } + if (!err) + fpsimd_update_current_state(&fpsimd); return err ? -EFAULT : 0; } diff --git a/arch/arm64/kernel/signal32.c b/arch/arm64/kernel/signal32.c index e393174fe859..31fbeaf4dd62 100644 --- a/arch/arm64/kernel/signal32.c +++ b/arch/arm64/kernel/signal32.c @@ -247,7 +247,7 @@ static int compat_preserve_vfp_context(struct compat_vfp_sigframe __user *frame) * Note that this also saves V16-31, which aren't visible * in AArch32. */ - fpsimd_save_state(fpsimd); + fpsimd_preserve_current_state(); /* Place structure header on the stack */ __put_user_error(magic, &frame->magic, err); @@ -310,11 +310,8 @@ static int compat_restore_vfp_context(struct compat_vfp_sigframe __user *frame) * We don't need to touch the exception register, so * reload the hardware state. */ - if (!err) { - preempt_disable(); - fpsimd_load_state(&fpsimd); - preempt_enable(); - } + if (!err) + fpsimd_update_current_state(&fpsimd); return err ? -EFAULT : 0; } From 74780f86ec6464edee0a87309da342f6237a6138 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Thu, 8 May 2014 11:20:23 +0200 Subject: [PATCH 66/82] arm64: defer reloading a task's FPSIMD state to userland resume If a task gets scheduled out and back in again and nothing has touched its FPSIMD state in the mean time, there is really no reason to reload it from memory. Similarly, repeated calls to kernel_neon_begin() and kernel_neon_end() will preserve and restore the FPSIMD state every time. This patch defers the FPSIMD state restore to the last possible moment, i.e., right before the task returns to userland. If a task does not return to userland at all (for any reason), the existing FPSIMD state is preserved and may be reused by the owning task if it gets scheduled in again on the same CPU. This patch adds two more functions to abstract away from straight FPSIMD register file saves and restores: - fpsimd_restore_current_state -> ensure current's FPSIMD state is loaded - fpsimd_flush_task_state -> invalidate live copies of a task's FPSIMD state Signed-off-by: Ard Biesheuvel Conflicts: arch/arm64/kernel/fpsimd.c Change-Id: Ib1c0d8d0afb3c248cd4d060eb35877530dd92fdc --- arch/arm64/include/asm/fpsimd.h | 5 + arch/arm64/include/asm/thread_info.h | 4 +- arch/arm64/kernel/entry.S | 2 +- arch/arm64/kernel/fpsimd.c | 142 ++++++++++++++++++++++++--- arch/arm64/kernel/ptrace.c | 2 + arch/arm64/kernel/signal.c | 4 + 6 files changed, 143 insertions(+), 16 deletions(-) diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h index f4e524b67e91..7a900142dbc8 100644 --- a/arch/arm64/include/asm/fpsimd.h +++ b/arch/arm64/include/asm/fpsimd.h @@ -37,6 +37,8 @@ struct fpsimd_state { u32 fpcr; }; }; + /* the id of the last cpu to have restored this state */ + unsigned int cpu; }; #if defined(__KERNEL__) && defined(CONFIG_COMPAT) @@ -59,8 +61,11 @@ extern void fpsimd_thread_switch(struct task_struct *next); extern void fpsimd_flush_thread(void); extern void fpsimd_preserve_current_state(void); +extern void fpsimd_restore_current_state(void); extern void fpsimd_update_current_state(struct fpsimd_state *state); +extern void fpsimd_flush_task_state(struct task_struct *target); + #endif #endif diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h index 3659e460071d..5e95a6ce074a 100644 --- a/arch/arm64/include/asm/thread_info.h +++ b/arch/arm64/include/asm/thread_info.h @@ -106,6 +106,7 @@ static inline struct thread_info *current_thread_info(void) #define TIF_SIGPENDING 0 #define TIF_NEED_RESCHED 1 #define TIF_NOTIFY_RESUME 2 /* callback before returning to user */ +#define TIF_FOREIGN_FPSTATE 3 /* CPU's FP state is not current's */ #define TIF_SYSCALL_TRACE 8 #define TIF_POLLING_NRFLAG 16 #define TIF_MEMDIE 18 /* is terminating due to OOM killer */ @@ -118,10 +119,11 @@ static inline struct thread_info *current_thread_info(void) #define _TIF_SIGPENDING (1 << TIF_SIGPENDING) #define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) #define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME) +#define _TIF_FOREIGN_FPSTATE (1 << TIF_FOREIGN_FPSTATE) #define _TIF_32BIT (1 << TIF_32BIT) #define _TIF_WORK_MASK (_TIF_NEED_RESCHED | _TIF_SIGPENDING | \ - _TIF_NOTIFY_RESUME) + _TIF_NOTIFY_RESUME | _TIF_FOREIGN_FPSTATE) #endif /* __KERNEL__ */ #endif /* __ASM_THREAD_INFO_H */ diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 1146e6f40a6b..0b65510230bb 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -587,7 +587,7 @@ fast_work_pending: str x0, [sp, #S_X0] // returned x0 work_pending: tbnz x1, #TIF_NEED_RESCHED, work_resched - /* TIF_SIGPENDING or TIF_NOTIFY_RESUME case */ + /* TIF_SIGPENDING, TIF_NOTIFY_RESUME or TIF_FOREIGN_FPSTATE case */ ldr x2, [sp, #S_PSTATE] mov x0, sp // 'regs' tst x2, #PSR_MODE_MASK // user mode regs? diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 135cfb88d2aa..52eb7d7782fe 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -32,6 +32,60 @@ #define FPEXC_IXF (1 << 4) #define FPEXC_IDF (1 << 7) +/* + * In order to reduce the number of times the FPSIMD state is needlessly saved + * and restored, we need to keep track of two things: + * (a) for each task, we need to remember which CPU was the last one to have + * the task's FPSIMD state loaded into its FPSIMD registers; + * (b) for each CPU, we need to remember which task's userland FPSIMD state has + * been loaded into its FPSIMD registers most recently, or whether it has + * been used to perform kernel mode NEON in the meantime. + * + * For (a), we add a 'cpu' field to struct fpsimd_state, which gets updated to + * the id of the current CPU everytime the state is loaded onto a CPU. For (b), + * we add the per-cpu variable 'fpsimd_last_state' (below), which contains the + * address of the userland FPSIMD state of the task that was loaded onto the CPU + * the most recently, or NULL if kernel mode NEON has been performed after that. + * + * With this in place, we no longer have to restore the next FPSIMD state right + * when switching between tasks. Instead, we can defer this check to userland + * resume, at which time we verify whether the CPU's fpsimd_last_state and the + * task's fpsimd_state.cpu are still mutually in sync. If this is the case, we + * can omit the FPSIMD restore. + * + * As an optimization, we use the thread_info flag TIF_FOREIGN_FPSTATE to + * indicate whether or not the userland FPSIMD state of the current task is + * present in the registers. The flag is set unless the FPSIMD registers of this + * CPU currently contain the most recent userland FPSIMD state of the current + * task. + * + * For a certain task, the sequence may look something like this: + * - the task gets scheduled in; if both the task's fpsimd_state.cpu field + * contains the id of the current CPU, and the CPU's fpsimd_last_state per-cpu + * variable points to the task's fpsimd_state, the TIF_FOREIGN_FPSTATE flag is + * cleared, otherwise it is set; + * + * - the task returns to userland; if TIF_FOREIGN_FPSTATE is set, the task's + * userland FPSIMD state is copied from memory to the registers, the task's + * fpsimd_state.cpu field is set to the id of the current CPU, the current + * CPU's fpsimd_last_state pointer is set to this task's fpsimd_state and the + * TIF_FOREIGN_FPSTATE flag is cleared; + * + * - the task executes an ordinary syscall; upon return to userland, the + * TIF_FOREIGN_FPSTATE flag will still be cleared, so no FPSIMD state is + * restored; + * + * - the task executes a syscall which executes some NEON instructions; this is + * preceded by a call to kernel_neon_begin(), which copies the task's FPSIMD + * register contents to memory, clears the fpsimd_last_state per-cpu variable + * and sets the TIF_FOREIGN_FPSTATE flag; + * + * - the task gets preempted after kernel_neon_end() is called; as we have not + * returned from the 2nd syscall yet, TIF_FOREIGN_FPSTATE is still set so + * whatever is in the FPSIMD registers is not saved to memory, but discarded. + */ +static DEFINE_PER_CPU(struct fpsimd_state *, fpsimd_last_state); + /* * Trapped FP/ASIMD access. */ @@ -70,39 +124,96 @@ void do_fpsimd_exc(unsigned int esr, struct pt_regs *regs) void fpsimd_thread_switch(struct task_struct *next) { - /* check if not kernel threads */ - if (current->mm) + /* + * Save the current FPSIMD state to memory, but only if whatever is in + * the registers is in fact the most recent userland FPSIMD state of + * 'current'. + */ + if (current->mm && !test_thread_flag(TIF_FOREIGN_FPSTATE)) fpsimd_save_state(¤t->thread.fpsimd_state); - if (next->mm) - fpsimd_load_state(&next->thread.fpsimd_state); + + if (next->mm) { + /* + * If we are switching to a task whose most recent userland + * FPSIMD state is already in the registers of *this* cpu, + * we can skip loading the state from memory. Otherwise, set + * the TIF_FOREIGN_FPSTATE flag so the state will be loaded + * upon the next return to userland. + */ + struct fpsimd_state *st = &next->thread.fpsimd_state; + + if (__this_cpu_read(fpsimd_last_state) == st + && st->cpu == smp_processor_id()) + clear_ti_thread_flag(task_thread_info(next), + TIF_FOREIGN_FPSTATE); + else + set_ti_thread_flag(task_thread_info(next), + TIF_FOREIGN_FPSTATE); + } } void fpsimd_flush_thread(void) { memset(¤t->thread.fpsimd_state, 0, sizeof(struct fpsimd_state)); - fpsimd_load_state(¤t->thread.fpsimd_state); + set_thread_flag(TIF_FOREIGN_FPSTATE); } /* - * Save the userland FPSIMD state of 'current' to memory + * Save the userland FPSIMD state of 'current' to memory, but only if the state + * currently held in the registers does in fact belong to 'current' */ void fpsimd_preserve_current_state(void) { preempt_disable(); - fpsimd_save_state(¤t->thread.fpsimd_state); + if (!test_thread_flag(TIF_FOREIGN_FPSTATE)) + fpsimd_save_state(¤t->thread.fpsimd_state); preempt_enable(); } /* - * Load an updated userland FPSIMD state for 'current' from memory + * Load the userland FPSIMD state of 'current' from memory, but only if the + * FPSIMD state already held in the registers is /not/ the most recent FPSIMD + * state of 'current' + */ +void fpsimd_restore_current_state(void) +{ + preempt_disable(); + if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) { + struct fpsimd_state *st = ¤t->thread.fpsimd_state; + + fpsimd_load_state(st); + this_cpu_write(fpsimd_last_state, st); + st->cpu = smp_processor_id(); + } + preempt_enable(); +} + +/* + * Load an updated userland FPSIMD state for 'current' from memory and set the + * flag that indicates that the FPSIMD register contents are the most recent + * FPSIMD state of 'current' */ void fpsimd_update_current_state(struct fpsimd_state *state) { preempt_disable(); fpsimd_load_state(state); + if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) { + struct fpsimd_state *st = ¤t->thread.fpsimd_state; + + this_cpu_write(fpsimd_last_state, st); + st->cpu = smp_processor_id(); + } preempt_enable(); } +/* + * Invalidate live CPU copies of task t's FPSIMD state + */ +void fpsimd_flush_task_state(struct task_struct *t) +{ + t->thread.fpsimd_state.cpu = NR_CPUS; +} + #ifdef CONFIG_KERNEL_MODE_NEON /* @@ -114,16 +225,19 @@ void kernel_neon_begin(void) BUG_ON(in_interrupt()); preempt_disable(); - if (current->mm) + /* + * Save the userland FPSIMD state if we have one and if we haven't done + * so already. Clear fpsimd_last_state to indicate that there is no + * longer userland FPSIMD state in the registers. + */ + if (current->mm && !test_and_set_thread_flag(TIF_FOREIGN_FPSTATE)) fpsimd_save_state(¤t->thread.fpsimd_state); + this_cpu_write(fpsimd_last_state, NULL); } EXPORT_SYMBOL(kernel_neon_begin); void kernel_neon_end(void) { - if (current->mm) - fpsimd_load_state(¤t->thread.fpsimd_state); - preempt_enable(); } EXPORT_SYMBOL(kernel_neon_end); @@ -136,12 +250,12 @@ static int fpsimd_cpu_pm_notifier(struct notifier_block *self, { switch (cmd) { case CPU_PM_ENTER: - if (current->mm) + if (current->mm && !test_thread_flag(TIF_FOREIGN_FPSTATE)) fpsimd_save_state(¤t->thread.fpsimd_state); break; case CPU_PM_EXIT: if (current->mm) - fpsimd_load_state(¤t->thread.fpsimd_state); + set_thread_flag(TIF_FOREIGN_FPSTATE); break; case CPU_PM_ENTER_FAILED: default: diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index 6e1e77f1831c..aebfc1519e8e 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -541,6 +541,7 @@ static int fpr_set(struct task_struct *target, const struct user_regset *regset, return ret; target->thread.fpsimd_state.user_fpsimd = newstate; + fpsimd_flush_task_state(target); return ret; } @@ -790,6 +791,7 @@ static int compat_vfp_set(struct task_struct *target, uregs->fpcr = fpscr & VFP_FPSCR_CTRL_MASK; } + fpsimd_flush_task_state(target); return ret; } diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c index 06448a77ff53..882f01774365 100644 --- a/arch/arm64/kernel/signal.c +++ b/arch/arm64/kernel/signal.c @@ -413,4 +413,8 @@ asmlinkage void do_notify_resume(struct pt_regs *regs, clear_thread_flag(TIF_NOTIFY_RESUME); tracehook_notify_resume(regs); } + + if (thread_flags & _TIF_FOREIGN_FPSTATE) + fpsimd_restore_current_state(); + } From 9ed72656162f38017af94acf2e986fdb04fecd05 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 24 Feb 2014 15:26:29 +0100 Subject: [PATCH 67/82] arm64: add support for kernel mode NEON in interrupt context This patch modifies kernel_neon_begin() and kernel_neon_end(), so they may be called from any context. To address the case where only a couple of registers are needed, kernel_neon_begin_partial(u32) is introduced which takes as a parameter the number of bottom 'n' NEON q-registers required. To mark the end of such a partial section, the regular kernel_neon_end() should be used. Signed-off-by: Ard Biesheuvel Conflicts: arch/arm64/include/asm/neon.h Change-Id: Ifc7c6aa77e2ab8dd98bb9975cccab54e09693ab7 --- arch/arm64/include/asm/fpsimd.h | 15 +++++++++ arch/arm64/include/asm/fpsimdmacros.h | 35 +++++++++++++++++++++ arch/arm64/include/asm/neon.h | 18 +++++++++++ arch/arm64/kernel/entry-fpsimd.S | 24 +++++++++++++++ arch/arm64/kernel/fpsimd.c | 44 ++++++++++++++++++--------- 5 files changed, 122 insertions(+), 14 deletions(-) create mode 100644 arch/arm64/include/asm/neon.h diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h index 7a900142dbc8..50f559f574fe 100644 --- a/arch/arm64/include/asm/fpsimd.h +++ b/arch/arm64/include/asm/fpsimd.h @@ -41,6 +41,17 @@ struct fpsimd_state { unsigned int cpu; }; +/* + * Struct for stacking the bottom 'n' FP/SIMD registers. + */ +struct fpsimd_partial_state { + u32 fpsr; + u32 fpcr; + u32 num_regs; + __uint128_t vregs[32]; +}; + + #if defined(__KERNEL__) && defined(CONFIG_COMPAT) /* Masks for extracting the FPSR and FPCR from the FPSCR */ #define VFP_FPSCR_STAT_MASK 0xf800009f @@ -66,6 +77,10 @@ extern void fpsimd_update_current_state(struct fpsimd_state *state); extern void fpsimd_flush_task_state(struct task_struct *target); +extern void fpsimd_save_partial_state(struct fpsimd_partial_state *state, + u32 num_regs); +extern void fpsimd_load_partial_state(struct fpsimd_partial_state *state); + #endif #endif diff --git a/arch/arm64/include/asm/fpsimdmacros.h b/arch/arm64/include/asm/fpsimdmacros.h index bbec599c96bd..768414d55e64 100644 --- a/arch/arm64/include/asm/fpsimdmacros.h +++ b/arch/arm64/include/asm/fpsimdmacros.h @@ -62,3 +62,38 @@ ldr w\tmpnr, [\state, #16 * 2 + 4] msr fpcr, x\tmpnr .endm + +.altmacro +.macro fpsimd_save_partial state, numnr, tmpnr1, tmpnr2 + mrs x\tmpnr1, fpsr + str w\numnr, [\state, #8] + mrs x\tmpnr2, fpcr + stp w\tmpnr1, w\tmpnr2, [\state] + adr x\tmpnr1, 0f + add \state, \state, x\numnr, lsl #4 + sub x\tmpnr1, x\tmpnr1, x\numnr, lsl #1 + br x\tmpnr1 + .irp qa, 30, 28, 26, 24, 22, 20, 18, 16, 14, 12, 10, 8, 6, 4, 2, 0 + .irp qb, %(qa + 1) + stp q\qa, q\qb, [\state, # -16 * \qa - 16] + .endr + .endr +0: +.endm + +.macro fpsimd_restore_partial state, tmpnr1, tmpnr2 + ldp w\tmpnr1, w\tmpnr2, [\state] + msr fpsr, x\tmpnr1 + msr fpcr, x\tmpnr2 + adr x\tmpnr1, 0f + ldr w\tmpnr2, [\state, #8] + add \state, \state, x\tmpnr2, lsl #4 + sub x\tmpnr1, x\tmpnr1, x\tmpnr2, lsl #1 + br x\tmpnr1 + .irp qa, 30, 28, 26, 24, 22, 20, 18, 16, 14, 12, 10, 8, 6, 4, 2, 0 + .irp qb, %(qa + 1) + ldp q\qa, q\qb, [\state, # -16 * \qa - 16] + .endr + .endr +0: +.endm diff --git a/arch/arm64/include/asm/neon.h b/arch/arm64/include/asm/neon.h new file mode 100644 index 000000000000..13ce4cc18e26 --- /dev/null +++ b/arch/arm64/include/asm/neon.h @@ -0,0 +1,18 @@ +/* + * linux/arch/arm64/include/asm/neon.h + * + * Copyright (C) 2013 Linaro Ltd + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include + +#define cpu_has_neon() (1) + +#define kernel_neon_begin() kernel_neon_begin_partial(32) + +void kernel_neon_begin_partial(u32 num_regs); +void kernel_neon_end(void); diff --git a/arch/arm64/kernel/entry-fpsimd.S b/arch/arm64/kernel/entry-fpsimd.S index 6a27cd6dbfa6..d358ccacfc00 100644 --- a/arch/arm64/kernel/entry-fpsimd.S +++ b/arch/arm64/kernel/entry-fpsimd.S @@ -41,3 +41,27 @@ ENTRY(fpsimd_load_state) fpsimd_restore x0, 8 ret ENDPROC(fpsimd_load_state) + +#ifdef CONFIG_KERNEL_MODE_NEON + +/* + * Save the bottom n FP registers. + * + * x0 - pointer to struct fpsimd_partial_state + */ +ENTRY(fpsimd_save_partial_state) + fpsimd_save_partial x0, 1, 8, 9 + ret +ENDPROC(fpsimd_load_partial_state) + +/* + * Load the bottom n FP registers. + * + * x0 - pointer to struct fpsimd_partial_state + */ +ENTRY(fpsimd_load_partial_state) + fpsimd_restore_partial x0, 8, 9 + ret +ENDPROC(fpsimd_load_partial_state) + +#endif diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 52eb7d7782fe..0f27578a7039 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -216,29 +216,45 @@ void fpsimd_flush_task_state(struct task_struct *t) #ifdef CONFIG_KERNEL_MODE_NEON +static DEFINE_PER_CPU(struct fpsimd_partial_state, hardirq_fpsimdstate); +static DEFINE_PER_CPU(struct fpsimd_partial_state, softirq_fpsimdstate); + /* * Kernel-side NEON support functions */ -void kernel_neon_begin(void) +void kernel_neon_begin_partial(u32 num_regs) { - /* Avoid using the NEON in interrupt context */ - BUG_ON(in_interrupt()); - preempt_disable(); + if (in_interrupt()) { + struct fpsimd_partial_state *s = this_cpu_ptr( + in_irq() ? &hardirq_fpsimdstate : &softirq_fpsimdstate); - /* - * Save the userland FPSIMD state if we have one and if we haven't done - * so already. Clear fpsimd_last_state to indicate that there is no - * longer userland FPSIMD state in the registers. - */ - if (current->mm && !test_and_set_thread_flag(TIF_FOREIGN_FPSTATE)) - fpsimd_save_state(¤t->thread.fpsimd_state); - this_cpu_write(fpsimd_last_state, NULL); + BUG_ON(num_regs > 32); + fpsimd_save_partial_state(s, roundup(num_regs, 2)); + } else { + /* + * Save the userland FPSIMD state if we have one and if we + * haven't done so already. Clear fpsimd_last_state to indicate + * that there is no longer userland FPSIMD state in the + * registers. + */ + preempt_disable(); + if (current->mm && + !test_and_set_thread_flag(TIF_FOREIGN_FPSTATE)) + fpsimd_save_state(¤t->thread.fpsimd_state); + this_cpu_write(fpsimd_last_state, NULL); + } } -EXPORT_SYMBOL(kernel_neon_begin); +EXPORT_SYMBOL(kernel_neon_begin_partial); void kernel_neon_end(void) { - preempt_enable(); + if (in_interrupt()) { + struct fpsimd_partial_state *s = this_cpu_ptr( + in_irq() ? &hardirq_fpsimdstate : &softirq_fpsimdstate); + fpsimd_load_partial_state(s); + } else { + preempt_enable(); + } } EXPORT_SYMBOL(kernel_neon_end); From 29c0c76ffbf9208b6348cef882dd053a6190bb66 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 4 Mar 2014 13:28:38 +0800 Subject: [PATCH 68/82] crypto: remove direct blkcipher_walk dependency on transform In order to allow other uses of the blkcipher walk API than the blkcipher algos themselves, this patch copies some of the transform data members to the walk struct so the transform is only accessed at walk init time. Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- crypto/blkcipher.c | 67 ++++++++++++++++++++--------------------- include/crypto/algapi.h | 5 ++- 2 files changed, 37 insertions(+), 35 deletions(-) diff --git a/crypto/blkcipher.c b/crypto/blkcipher.c index a79e7e9ab86e..46fdab5e9cc7 100644 --- a/crypto/blkcipher.c +++ b/crypto/blkcipher.c @@ -70,14 +70,12 @@ static inline u8 *blkcipher_get_spot(u8 *start, unsigned int len) return max(start, end_page); } -static inline unsigned int blkcipher_done_slow(struct crypto_blkcipher *tfm, - struct blkcipher_walk *walk, +static inline unsigned int blkcipher_done_slow(struct blkcipher_walk *walk, unsigned int bsize) { u8 *addr; - unsigned int alignmask = crypto_blkcipher_alignmask(tfm); - addr = (u8 *)ALIGN((unsigned long)walk->buffer, alignmask + 1); + addr = (u8 *)ALIGN((unsigned long)walk->buffer, walk->alignmask + 1); addr = blkcipher_get_spot(addr, bsize); scatterwalk_copychunks(addr, &walk->out, bsize, 1); return bsize; @@ -105,7 +103,6 @@ static inline unsigned int blkcipher_done_fast(struct blkcipher_walk *walk, int blkcipher_walk_done(struct blkcipher_desc *desc, struct blkcipher_walk *walk, int err) { - struct crypto_blkcipher *tfm = desc->tfm; unsigned int nbytes = 0; if (likely(err >= 0)) { @@ -117,7 +114,7 @@ int blkcipher_walk_done(struct blkcipher_desc *desc, err = -EINVAL; goto err; } else - n = blkcipher_done_slow(tfm, walk, n); + n = blkcipher_done_slow(walk, n); nbytes = walk->total - n; err = 0; @@ -136,7 +133,7 @@ err: } if (walk->iv != desc->info) - memcpy(desc->info, walk->iv, crypto_blkcipher_ivsize(tfm)); + memcpy(desc->info, walk->iv, walk->ivsize); if (walk->buffer != walk->page) kfree(walk->buffer); if (walk->page) @@ -226,22 +223,20 @@ static inline int blkcipher_next_fast(struct blkcipher_desc *desc, static int blkcipher_walk_next(struct blkcipher_desc *desc, struct blkcipher_walk *walk) { - struct crypto_blkcipher *tfm = desc->tfm; - unsigned int alignmask = crypto_blkcipher_alignmask(tfm); unsigned int bsize; unsigned int n; int err; n = walk->total; - if (unlikely(n < crypto_blkcipher_blocksize(tfm))) { + if (unlikely(n < walk->cipher_blocksize)) { desc->flags |= CRYPTO_TFM_RES_BAD_BLOCK_LEN; return blkcipher_walk_done(desc, walk, -EINVAL); } walk->flags &= ~(BLKCIPHER_WALK_SLOW | BLKCIPHER_WALK_COPY | BLKCIPHER_WALK_DIFF); - if (!scatterwalk_aligned(&walk->in, alignmask) || - !scatterwalk_aligned(&walk->out, alignmask)) { + if (!scatterwalk_aligned(&walk->in, walk->alignmask) || + !scatterwalk_aligned(&walk->out, walk->alignmask)) { walk->flags |= BLKCIPHER_WALK_COPY; if (!walk->page) { walk->page = (void *)__get_free_page(GFP_ATOMIC); @@ -250,12 +245,12 @@ static int blkcipher_walk_next(struct blkcipher_desc *desc, } } - bsize = min(walk->blocksize, n); + bsize = min(walk->walk_blocksize, n); n = scatterwalk_clamp(&walk->in, n); n = scatterwalk_clamp(&walk->out, n); if (unlikely(n < bsize)) { - err = blkcipher_next_slow(desc, walk, bsize, alignmask); + err = blkcipher_next_slow(desc, walk, bsize, walk->alignmask); goto set_phys_lowmem; } @@ -277,28 +272,26 @@ set_phys_lowmem: return err; } -static inline int blkcipher_copy_iv(struct blkcipher_walk *walk, - struct crypto_blkcipher *tfm, - unsigned int alignmask) +static inline int blkcipher_copy_iv(struct blkcipher_walk *walk) { - unsigned bs = walk->blocksize; - unsigned int ivsize = crypto_blkcipher_ivsize(tfm); - unsigned aligned_bs = ALIGN(bs, alignmask + 1); - unsigned int size = aligned_bs * 2 + ivsize + max(aligned_bs, ivsize) - - (alignmask + 1); + unsigned bs = walk->walk_blocksize; + unsigned aligned_bs = ALIGN(bs, walk->alignmask + 1); + unsigned int size = aligned_bs * 2 + + walk->ivsize + max(aligned_bs, walk->ivsize) - + (walk->alignmask + 1); u8 *iv; - size += alignmask & ~(crypto_tfm_ctx_alignment() - 1); + size += walk->alignmask & ~(crypto_tfm_ctx_alignment() - 1); walk->buffer = kmalloc(size, GFP_ATOMIC); if (!walk->buffer) return -ENOMEM; - iv = (u8 *)ALIGN((unsigned long)walk->buffer, alignmask + 1); + iv = (u8 *)ALIGN((unsigned long)walk->buffer, walk->alignmask + 1); iv = blkcipher_get_spot(iv, bs) + aligned_bs; iv = blkcipher_get_spot(iv, bs) + aligned_bs; - iv = blkcipher_get_spot(iv, ivsize); + iv = blkcipher_get_spot(iv, walk->ivsize); - walk->iv = memcpy(iv, walk->iv, ivsize); + walk->iv = memcpy(iv, walk->iv, walk->ivsize); return 0; } @@ -306,7 +299,10 @@ int blkcipher_walk_virt(struct blkcipher_desc *desc, struct blkcipher_walk *walk) { walk->flags &= ~BLKCIPHER_WALK_PHYS; - walk->blocksize = crypto_blkcipher_blocksize(desc->tfm); + walk->walk_blocksize = crypto_blkcipher_blocksize(desc->tfm); + walk->cipher_blocksize = walk->walk_blocksize; + walk->ivsize = crypto_blkcipher_ivsize(desc->tfm); + walk->alignmask = crypto_blkcipher_alignmask(desc->tfm); return blkcipher_walk_first(desc, walk); } EXPORT_SYMBOL_GPL(blkcipher_walk_virt); @@ -315,7 +311,10 @@ int blkcipher_walk_phys(struct blkcipher_desc *desc, struct blkcipher_walk *walk) { walk->flags |= BLKCIPHER_WALK_PHYS; - walk->blocksize = crypto_blkcipher_blocksize(desc->tfm); + walk->walk_blocksize = crypto_blkcipher_blocksize(desc->tfm); + walk->cipher_blocksize = walk->walk_blocksize; + walk->ivsize = crypto_blkcipher_ivsize(desc->tfm); + walk->alignmask = crypto_blkcipher_alignmask(desc->tfm); return blkcipher_walk_first(desc, walk); } EXPORT_SYMBOL_GPL(blkcipher_walk_phys); @@ -323,9 +322,6 @@ EXPORT_SYMBOL_GPL(blkcipher_walk_phys); static int blkcipher_walk_first(struct blkcipher_desc *desc, struct blkcipher_walk *walk) { - struct crypto_blkcipher *tfm = desc->tfm; - unsigned int alignmask = crypto_blkcipher_alignmask(tfm); - if (WARN_ON_ONCE(in_irq())) return -EDEADLK; @@ -335,8 +331,8 @@ static int blkcipher_walk_first(struct blkcipher_desc *desc, walk->buffer = NULL; walk->iv = desc->info; - if (unlikely(((unsigned long)walk->iv & alignmask))) { - int err = blkcipher_copy_iv(walk, tfm, alignmask); + if (unlikely(((unsigned long)walk->iv & walk->alignmask))) { + int err = blkcipher_copy_iv(walk); if (err) return err; } @@ -353,7 +349,10 @@ int blkcipher_walk_virt_block(struct blkcipher_desc *desc, unsigned int blocksize) { walk->flags &= ~BLKCIPHER_WALK_PHYS; - walk->blocksize = blocksize; + walk->walk_blocksize = blocksize; + walk->cipher_blocksize = crypto_blkcipher_blocksize(desc->tfm); + walk->ivsize = crypto_blkcipher_ivsize(desc->tfm); + walk->alignmask = crypto_blkcipher_alignmask(desc->tfm); return blkcipher_walk_first(desc, walk); } EXPORT_SYMBOL_GPL(blkcipher_walk_virt_block); diff --git a/include/crypto/algapi.h b/include/crypto/algapi.h index 418d270e1806..6c4d916d739d 100644 --- a/include/crypto/algapi.h +++ b/include/crypto/algapi.h @@ -100,9 +100,12 @@ struct blkcipher_walk { void *page; u8 *buffer; u8 *iv; + unsigned int ivsize; int flags; - unsigned int blocksize; + unsigned int walk_blocksize; + unsigned int cipher_blocksize; + unsigned int alignmask; }; struct ablkcipher_walk { From 15aaa954da86024fa1b4e1c26b162fb7be84d2df Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 4 Mar 2014 13:28:39 +0800 Subject: [PATCH 69/82] crypto: allow blkcipher walks over AEAD data This adds the function blkcipher_aead_walk_virt_block, which allows the caller to use the blkcipher walk API to handle the input and output scatterlists. Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- crypto/blkcipher.c | 14 ++++++++++++++ include/crypto/algapi.h | 4 ++++ 2 files changed, 18 insertions(+) diff --git a/crypto/blkcipher.c b/crypto/blkcipher.c index 46fdab5e9cc7..0122bec38564 100644 --- a/crypto/blkcipher.c +++ b/crypto/blkcipher.c @@ -357,6 +357,20 @@ int blkcipher_walk_virt_block(struct blkcipher_desc *desc, } EXPORT_SYMBOL_GPL(blkcipher_walk_virt_block); +int blkcipher_aead_walk_virt_block(struct blkcipher_desc *desc, + struct blkcipher_walk *walk, + struct crypto_aead *tfm, + unsigned int blocksize) +{ + walk->flags &= ~BLKCIPHER_WALK_PHYS; + walk->walk_blocksize = blocksize; + walk->cipher_blocksize = crypto_aead_blocksize(tfm); + walk->ivsize = crypto_aead_ivsize(tfm); + walk->alignmask = crypto_aead_alignmask(tfm); + return blkcipher_walk_first(desc, walk); +} +EXPORT_SYMBOL_GPL(blkcipher_aead_walk_virt_block); + static int setkey_unaligned(struct crypto_tfm *tfm, const u8 *key, unsigned int keylen) { diff --git a/include/crypto/algapi.h b/include/crypto/algapi.h index 6c4d916d739d..063f8ef49301 100644 --- a/include/crypto/algapi.h +++ b/include/crypto/algapi.h @@ -195,6 +195,10 @@ int blkcipher_walk_phys(struct blkcipher_desc *desc, int blkcipher_walk_virt_block(struct blkcipher_desc *desc, struct blkcipher_walk *walk, unsigned int blocksize); +int blkcipher_aead_walk_virt_block(struct blkcipher_desc *desc, + struct blkcipher_walk *walk, + struct crypto_aead *tfm, + unsigned int blocksize); int ablkcipher_walk_done(struct ablkcipher_request *req, struct ablkcipher_walk *walk, int err); From 84d391c5bf867c5e0a615cfa277eb8eb5b6d6453 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 4 Mar 2014 01:10:04 +0000 Subject: [PATCH 70/82] arm64: enable generic CPU feature modalias matching for this architecture This enables support for the generic CPU feature modalias implementation that wires up optional CPU features to udev based module autoprobing. A file is provided that maps CPU feature numbers to elf_hwcap bits, which is the standard way on arm64 to advertise optional CPU features both internally and to user space. Signed-off-by: Ard Biesheuvel [catalin.marinas@arm.com: removed unnecessary "!!"] Signed-off-by: Catalin Marinas Conflicts: arch/arm64/Kconfig Change-Id: Ief16b3197cd0564d8cf8aa82e9614bcda6399fe5 --- arch/arm64/Kconfig | 2 ++ arch/arm64/include/asm/cpufeature.h | 29 +++++++++++++++++++++++++++++ 2 files changed, 31 insertions(+) create mode 100644 arch/arm64/include/asm/cpufeature.h diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 8832ac02ffef..3ee27d34a020 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -10,6 +10,8 @@ config ARM64 select CLONE_BACKWARDS select COMMON_CLK select GENERIC_CLOCKEVENTS + select GENERIC_CLOCKEVENTS_BROADCAST if SMP + select GENERIC_CPU_AUTOPROBE select GENERIC_IOMAP select GENERIC_IRQ_PROBE select GENERIC_IRQ_SHOW diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h new file mode 100644 index 000000000000..cd4ac0516488 --- /dev/null +++ b/arch/arm64/include/asm/cpufeature.h @@ -0,0 +1,29 @@ +/* + * Copyright (C) 2014 Linaro Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef __ASM_CPUFEATURE_H +#define __ASM_CPUFEATURE_H + +#include + +/* + * In the arm64 world (as in the ARM world), elf_hwcap is used both internally + * in the kernel and for user space to keep track of which optional features + * are supported by the current system. So let's map feature 'x' to HWCAP_x. + * Note that HWCAP_x constants are bit fields so we need to take the log. + */ + +#define MAX_CPU_FEATURES (8 * sizeof(elf_hwcap)) +#define cpu_feature(x) ilog2(HWCAP_ ## x) + +static inline bool cpu_have_feature(unsigned int num) +{ + return elf_hwcap & (1UL << num); +} + +#endif From 820140883b83b548cd09151732ae783abc2976a0 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 24 Sep 2013 09:28:03 +0200 Subject: [PATCH 71/82] arm64: pull in from asm-generic Signed-off-by: Ard Biesheuvel --- arch/arm64/include/asm/Kbuild | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/include/asm/Kbuild b/arch/arm64/include/asm/Kbuild index 79a642d199f2..fb6e0c4d46a3 100644 --- a/arch/arm64/include/asm/Kbuild +++ b/arch/arm64/include/asm/Kbuild @@ -36,6 +36,7 @@ generic-y += segment.h generic-y += sembuf.h generic-y += serial.h generic-y += shmbuf.h +generic-y += simd.h generic-y += sizes.h generic-y += socket.h generic-y += sockios.h From bf485e6f51d505bbe4ab5eaafcfe7789ec83e7ee Mon Sep 17 00:00:00 2001 From: Greg Hackmann Date: Thu, 28 Aug 2014 14:00:10 -0700 Subject: [PATCH 72/82] arm64: check for upper PAGE_SHIFT bits in pfn_valid() pfn_valid() returns a false positive when the lower (64 - PAGE_SHIFT) bits match a valid pfn but some of the upper bits are set. This caused a kernel panic in kpageflags_read() when a userspace utility parsed /proc/*/pagemap, neglected to discard the upper flag bits, and tried to lseek()+read() from the corresponding offset in /proc/kpageflags. A valid pfn will never have the upper PAGE_SHIFT bits set, so simply check for this before passing the pfn to memblock_is_memory(). Change-Id: Ief5d8cd4dd93cbecd545a634a8d5885865cb5970 Signed-off-by: Greg Hackmann --- arch/arm64/mm/init.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index f497ca77925a..61599b516c66 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -109,9 +109,11 @@ static void __init zone_sizes_init(unsigned long min, unsigned long max) } #ifdef CONFIG_HAVE_ARCH_PFN_VALID +#define PFN_MASK ((1UL << (64 - PAGE_SHIFT)) - 1) + int pfn_valid(unsigned long pfn) { - return memblock_is_memory(pfn << PAGE_SHIFT); + return (pfn & PFN_MASK) == pfn && memblock_is_memory(pfn << PAGE_SHIFT); } EXPORT_SYMBOL(pfn_valid); #endif From 02273a4cd3ebada77f088ae13229813ca918af44 Mon Sep 17 00:00:00 2001 From: Jean Pihet Date: Mon, 3 Feb 2014 19:18:29 +0100 Subject: [PATCH 73/82] ARM64: perf: support dwarf unwinding in compat mode Add support for unwinding using the dwarf information in compat mode. Using the correct user stack pointer allows perf to record the frames correctly in the native and compat modes. Note that although the dwarf frame unwinding works ok using libunwind in native mode (on ARMv7 & ARMv8), some changes are required to the libunwind code for the compat mode. Those changes are posted separately on the libunwind mailing list. Tested on ARMv8 platform with v8 and compat v7 binaries, the latter are statically built. Signed-off-by: Jean Pihet Acked-by: Will Deacon Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/compat.h | 2 +- arch/arm64/include/asm/ptrace.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/include/asm/compat.h b/arch/arm64/include/asm/compat.h index 899af807ef0f..7058eec269ab 100644 --- a/arch/arm64/include/asm/compat.h +++ b/arch/arm64/include/asm/compat.h @@ -214,7 +214,7 @@ static inline compat_uptr_t ptr_to_compat(void __user *uptr) return (u32)(unsigned long)uptr; } -#define compat_user_stack_pointer() (current_pt_regs()->compat_sp) +#define compat_user_stack_pointer() (user_stack_pointer(current_pt_regs())) static inline void __user *arch_compat_alloc_user_space(long len) { diff --git a/arch/arm64/include/asm/ptrace.h b/arch/arm64/include/asm/ptrace.h index 41a71ee4c3df..c2a199727e4f 100644 --- a/arch/arm64/include/asm/ptrace.h +++ b/arch/arm64/include/asm/ptrace.h @@ -131,7 +131,7 @@ struct pt_regs { (!((regs)->pstate & PSR_F_BIT)) #define user_stack_pointer(regs) \ - ((regs)->sp) + (!compat_user_mode(regs)) ? ((regs)->sp) : ((regs)->compat_sp) /* * Are the current registers suitable for user mode? (used to maintain From 8db0b1851e5e636d61c9d0d15fc4f5de4af0857c Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Thu, 10 Jul 2014 11:37:40 +0100 Subject: [PATCH 74/82] arm64: Cast KSTK_(EIP|ESP) to unsigned long This is for similarity with thread_saved_(pc|sp) and to avoid some compiler warnings in the audit code. Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/processor.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h index ab239b2c456f..db3112886968 100644 --- a/arch/arm64/include/asm/processor.h +++ b/arch/arm64/include/asm/processor.h @@ -131,8 +131,8 @@ extern struct task_struct *cpu_switch_to(struct task_struct *prev, #define task_pt_regs(p) \ ((struct pt_regs *)(THREAD_START_SP + task_stack_page(p)) - 1) -#define KSTK_EIP(tsk) task_pt_regs(tsk)->pc -#define KSTK_ESP(tsk) task_pt_regs(tsk)->sp +#define KSTK_EIP(tsk) ((unsigned long)task_pt_regs(tsk)->pc) +#define KSTK_ESP(tsk) ((unsigned long)task_pt_regs(tsk)->sp) /* * Prefetching support From dc52724b7222f368c49b578787b2fd876c796fc7 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 29 Aug 2014 16:11:10 +0100 Subject: [PATCH 75/82] arm64: report correct stack pointer in KSTK_ESP for compat tasks The KSTK_ESP macro is used to determine the user stack pointer for a given task. In particular, this is used to to report the '[stack]' VMA in /proc/self/maps, which is used by Android to determine the stack location for children of the main thread. This patch fixes the macro to use user_stack_pointer instead of directly returning sp. This means that we report w13 instead of sp, since the former is used as the stack pointer when executing in AArch32 state. Cc: Reported-by: Serban Constantinescu Signed-off-by: Will Deacon --- arch/arm64/include/asm/processor.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h index db3112886968..3b7bb031f98f 100644 --- a/arch/arm64/include/asm/processor.h +++ b/arch/arm64/include/asm/processor.h @@ -132,7 +132,7 @@ extern struct task_struct *cpu_switch_to(struct task_struct *prev, ((struct pt_regs *)(THREAD_START_SP + task_stack_page(p)) - 1) #define KSTK_EIP(tsk) ((unsigned long)task_pt_regs(tsk)->pc) -#define KSTK_ESP(tsk) ((unsigned long)task_pt_regs(tsk)->sp) +#define KSTK_ESP(tsk) user_stack_pointer(task_pt_regs(tsk)) /* * Prefetching support From 597ff3a0f3ba6e471ce4dc93bf0b41abcebe9f8c Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Fri, 29 Aug 2014 16:08:02 +0100 Subject: [PATCH 76/82] arm64: Add brackets around user_stack_pointer() Commit 5f888a1d33 (ARM64: perf: support dwarf unwinding in compat mode) changes user_stack_pointer() to return the compat SP for 32-bit tasks but without brackets around the whole definition, with possible issues on the call sites (noticed with a subsequent fix for KSTK_ESP). Fixes: 5f888a1d33c4 (ARM64: perf: support dwarf unwinding in compat mode) Reported-by: Sudeep Holla Cc: Signed-off-by: Catalin Marinas Signed-off-by: Will Deacon --- arch/arm64/include/asm/ptrace.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/ptrace.h b/arch/arm64/include/asm/ptrace.h index c2a199727e4f..7304fa2fd9fa 100644 --- a/arch/arm64/include/asm/ptrace.h +++ b/arch/arm64/include/asm/ptrace.h @@ -131,7 +131,7 @@ struct pt_regs { (!((regs)->pstate & PSR_F_BIT)) #define user_stack_pointer(regs) \ - (!compat_user_mode(regs)) ? ((regs)->sp) : ((regs)->compat_sp) + (!compat_user_mode(regs) ? (regs)->sp : (regs)->compat_sp) /* * Are the current registers suitable for user mode? (used to maintain From c7ca25a4ad7498bba8c20ede3c5f4a3b7931887a Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Mon, 1 Sep 2014 11:09:51 +0800 Subject: [PATCH 77/82] arm64: fix bug for reloading FPSIMD state after cpu power off Now arm64 defers reloading FPSIMD state, but this optimization also introduces the bug after cpu resume back from low power mode. The reason is after the cpu has been powered off, s/w need set the cpu's fpsimd_last_state to NULL so that it will force to reload FPSIMD state for the thread, otherwise there has the chance to meet the condition for both the task's fpsimd_state.cpu field contains the id of the current cpu, and the cpu's fpsimd_last_state per-cpu variable points to the task's fpsimd_state, so finally kernel will skip to reload the context during it return back to userland. Acked-by: Ard Biesheuvel Reviewed-by: Catalin Marinas Signed-off-by: Leo Yan Signed-off-by: Will Deacon --- arch/arm64/kernel/fpsimd.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 0f27578a7039..093bee27e527 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -268,6 +268,7 @@ static int fpsimd_cpu_pm_notifier(struct notifier_block *self, case CPU_PM_ENTER: if (current->mm && !test_thread_flag(TIF_FOREIGN_FPSTATE)) fpsimd_save_state(¤t->thread.fpsimd_state); + this_cpu_write(fpsimd_last_state, NULL); break; case CPU_PM_EXIT: if (current->mm) From f50be83d236b9b0169b85c47b8878f61bb22f448 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 6 May 2013 03:10:35 +0100 Subject: [PATCH 78/82] apparmor: no need to delay vfree() vfree() can be called from interrupt contexts now Signed-off-by: Al Viro Acked-by: John Johansen Signed-off-by: James Morris --- security/apparmor/lib.c | 24 +++--------------------- 1 file changed, 3 insertions(+), 21 deletions(-) diff --git a/security/apparmor/lib.c b/security/apparmor/lib.c index 7430298116d6..5b62af7254ca 100644 --- a/security/apparmor/lib.c +++ b/security/apparmor/lib.c @@ -104,19 +104,6 @@ void *kvmalloc(size_t size) return buffer; } -/** - * do_vfree - workqueue routine for freeing vmalloced memory - * @work: data to be freed - * - * The work_struct is overlaid to the data being freed, as at the point - * the work is scheduled the data is no longer valid, be its freeing - * needs to be delayed until safe. - */ -static void do_vfree(struct work_struct *work) -{ - vfree(work); -} - /** * kvfree - free an allocation do by kvmalloc * @buffer: buffer to free (MAYBE_NULL) @@ -125,13 +112,8 @@ static void do_vfree(struct work_struct *work) */ void kvfree(void *buffer) { - if (is_vmalloc_addr(buffer)) { - /* Data is no longer valid so just use the allocated space - * as the work_struct - */ - struct work_struct *work = (struct work_struct *) buffer; - INIT_WORK(work, do_vfree); - schedule_work(work); - } else + if (is_vmalloc_addr(buffer)) + vfree(buffer); + else kfree(buffer); } From 40ef955d6541361f2e22044a2362ee13f0fde89b Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 6 May 2014 14:02:53 -0400 Subject: [PATCH 79/82] nick kvfree() from apparmor too many places open-code it Signed-off-by: Al Viro Conflicts: mm/util.c security/apparmor/include/apparmor.h Change-Id: Ie8602e0199282dc462921cb7217158d1998853b0 --- include/linux/mm.h | 2 ++ mm/util.c | 10 ++++++++++ security/apparmor/include/apparmor.h | 1 - security/apparmor/lib.c | 14 -------------- 4 files changed, 12 insertions(+), 15 deletions(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index 01eb01df9225..ff7f6375f33f 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -324,6 +324,8 @@ static inline int is_vmalloc_or_module_addr(const void *x) } #endif +extern void kvfree(const void *addr); + static inline void compound_lock(struct page *page) { #ifdef CONFIG_TRANSPARENT_HUGEPAGE diff --git a/mm/util.c b/mm/util.c index ab1424dbe2e6..36aa4815e3dd 100644 --- a/mm/util.c +++ b/mm/util.c @@ -7,6 +7,7 @@ #include #include #include +#include #include #include "internal.h" @@ -384,6 +385,15 @@ unsigned long vm_mmap(struct file *file, unsigned long addr, } EXPORT_SYMBOL(vm_mmap); +void kvfree(const void *addr) +{ + if (is_vmalloc_addr(addr)) + vfree(addr); + else + kfree(addr); +} +EXPORT_SYMBOL(kvfree); + struct address_space *page_mapping(struct page *page) { struct address_space *mapping = page->mapping; diff --git a/security/apparmor/include/apparmor.h b/security/apparmor/include/apparmor.h index 40aedd9f73ea..4a8cbfeef8b3 100644 --- a/security/apparmor/include/apparmor.h +++ b/security/apparmor/include/apparmor.h @@ -65,7 +65,6 @@ extern int apparmor_initialized __initdata; char *aa_split_fqname(char *args, char **ns_name); void aa_info_message(const char *str); void *kvmalloc(size_t size); -void kvfree(void *buffer); /** diff --git a/security/apparmor/lib.c b/security/apparmor/lib.c index 5b62af7254ca..ce8d9a84ab2d 100644 --- a/security/apparmor/lib.c +++ b/security/apparmor/lib.c @@ -103,17 +103,3 @@ void *kvmalloc(size_t size) } return buffer; } - -/** - * kvfree - free an allocation do by kvmalloc - * @buffer: buffer to free (MAYBE_NULL) - * - * Free a buffer allocated by kvmalloc - */ -void kvfree(void *buffer) -{ - if (is_vmalloc_addr(buffer)) - vfree(buffer); - else - kfree(buffer); -} From 8b9ca310b11458aeeb14861e34f896d67c1333ff Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 2 Jul 2014 15:22:37 -0700 Subject: [PATCH 80/82] fs/seq_file: fallback to vmalloc allocation There are a couple of seq_files which use the single_open() interface. This interface requires that the whole output must fit into a single buffer. E.g. for /proc/stat allocation failures have been observed because an order-4 memory allocation failed due to memory fragmentation. In such situations reading /proc/stat is not possible anymore. Therefore change the seq_file code to fallback to vmalloc allocations which will usually result in a couple of order-0 allocations and hence also work if memory is fragmented. For reference a call trace where reading from /proc/stat failed: sadc: page allocation failure: order:4, mode:0x1040d0 CPU: 1 PID: 192063 Comm: sadc Not tainted 3.10.0-123.el7.s390x #1 [...] Call Trace: show_stack+0x6c/0xe8 warn_alloc_failed+0xd6/0x138 __alloc_pages_nodemask+0x9da/0xb68 __get_free_pages+0x2e/0x58 kmalloc_order_trace+0x44/0xc0 stat_open+0x5a/0xd8 proc_reg_open+0x8a/0x140 do_dentry_open+0x1bc/0x2c8 finish_open+0x46/0x60 do_last+0x382/0x10d0 path_openat+0xc8/0x4f8 do_filp_open+0x46/0xa8 do_sys_open+0x114/0x1f0 sysc_tracego+0x14/0x1a Signed-off-by: Heiko Carstens Tested-by: David Rientjes Cc: Ian Kent Cc: Hendrik Brueckner Cc: Thorsten Diehl Cc: Andrea Righi Cc: Christoph Hellwig Cc: Al Viro Cc: Stefan Bader Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Conflicts: fs/seq_file.c Change-Id: I009080dd017b020ffd5e812e5b472bdb8349217a --- fs/seq_file.c | 30 +++++++++++++++++++++--------- 1 file changed, 21 insertions(+), 9 deletions(-) diff --git a/fs/seq_file.c b/fs/seq_file.c index 774c1eb7f1c9..749b84104014 100644 --- a/fs/seq_file.c +++ b/fs/seq_file.c @@ -8,8 +8,10 @@ #include #include #include +#include #include #include +#include #include #include @@ -30,6 +32,16 @@ static void seq_set_overflow(struct seq_file *m) m->count = m->size; } +static void *seq_buf_alloc(unsigned long size) +{ + void *buf; + + buf = kmalloc(size, GFP_KERNEL | __GFP_NOWARN); + if (!buf && size > PAGE_SIZE) + buf = vmalloc(size); + return buf; +} + /** * seq_open - initialize sequential file * @file: file we initialize @@ -96,7 +108,7 @@ static int traverse(struct seq_file *m, loff_t offset) return 0; } if (!m->buf) { - m->buf = kmalloc(m->size = PAGE_SIZE, GFP_KERNEL); + m->buf = seq_buf_alloc(m->size = PAGE_SIZE); if (!m->buf) return -ENOMEM; } @@ -135,8 +147,8 @@ static int traverse(struct seq_file *m, loff_t offset) Eoverflow: m->op->stop(m, p); - kfree(m->buf); - m->buf = kmalloc(m->size <<= 1, GFP_KERNEL); + kvfree(m->buf); + m->buf = seq_buf_alloc(m->size <<= 1); return !m->buf ? -ENOMEM : -EAGAIN; } @@ -191,7 +203,7 @@ ssize_t seq_read(struct file *file, char __user *buf, size_t size, loff_t *ppos) /* grab buffer if we didn't have one */ if (!m->buf) { - m->buf = kmalloc(m->size = PAGE_SIZE, GFP_KERNEL); + m->buf = seq_buf_alloc(m->size = PAGE_SIZE); if (!m->buf) goto Enomem; } @@ -231,8 +243,8 @@ ssize_t seq_read(struct file *file, char __user *buf, size_t size, loff_t *ppos) if (m->count < m->size) goto Fill; m->op->stop(m, p); - kfree(m->buf); - m->buf = kmalloc(m->size <<= 1, GFP_KERNEL); + kvfree(m->buf); + m->buf = seq_buf_alloc(m->size <<= 1); if (!m->buf) goto Enomem; m->count = 0; @@ -347,7 +359,7 @@ EXPORT_SYMBOL(seq_lseek); int seq_release(struct inode *inode, struct file *file) { struct seq_file *m = file->private_data; - kfree(m->buf); + kvfree(m->buf); kfree(m); return 0; } @@ -602,13 +614,13 @@ EXPORT_SYMBOL(single_open); int single_open_size(struct file *file, int (*show)(struct seq_file *, void *), void *data, size_t size) { - char *buf = kmalloc(size, GFP_KERNEL); + char *buf = seq_buf_alloc(size); int ret; if (!buf) return -ENOMEM; ret = single_open(file, show, data); if (ret) { - kfree(buf); + kvfree(buf); return ret; } ((struct seq_file *)file->private_data)->buf = buf; From 2d8da662dc9dc79d52847beddf0ae1a6e559bbca Mon Sep 17 00:00:00 2001 From: JP Abgrall Date: Wed, 3 Sep 2014 17:36:44 -0700 Subject: [PATCH 81/82] android: base-cfg: enforce the needed XFRM_MODE_TUNNEL (for VPN) Change-Id: I587023d56877d32806079676790751155c768982 Signed-off-by: JP Abgrall --- android/configs/android-base.cfg | 1 + 1 file changed, 1 insertion(+) diff --git a/android/configs/android-base.cfg b/android/configs/android-base.cfg index 5b888487ede1..bdc4749a195f 100644 --- a/android/configs/android-base.cfg +++ b/android/configs/android-base.cfg @@ -24,6 +24,7 @@ CONFIG_INET6_ESP=y CONFIG_INET6_IPCOMP=y CONFIG_INET=y CONFIG_INET_ESP=y +CONFIG_INET_XFRM_MODE_TUNNEL=y CONFIG_IP6_NF_FILTER=y CONFIG_IP6_NF_IPTABLES=y CONFIG_IP6_NF_MANGLE=y From 42df511db0372b4878b322d812b975e4632a2928 Mon Sep 17 00:00:00 2001 From: Todd Poynor Date: Fri, 5 Sep 2014 18:27:38 -0700 Subject: [PATCH 82/82] cpufreq: interactive: make common_tunables static From: Cylen Yao common_tunables should be static. Change-Id: I502ee3062bece5082fea7861eff2f6237e25cede Signed-off-by: Todd Poynor --- drivers/cpufreq/cpufreq_interactive.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/cpufreq/cpufreq_interactive.c b/drivers/cpufreq/cpufreq_interactive.c index 437aaed57057..72563be3537c 100644 --- a/drivers/cpufreq/cpufreq_interactive.c +++ b/drivers/cpufreq/cpufreq_interactive.c @@ -117,7 +117,7 @@ struct cpufreq_interactive_tunables { }; /* For cases where we have single governor instance for system */ -struct cpufreq_interactive_tunables *common_tunables; +static struct cpufreq_interactive_tunables *common_tunables; static struct attribute_group *get_sysfs_attr(void);