From 142953a07f68a464a626ad9ca2ae10d3f9ec2cbd Mon Sep 17 00:00:00 2001 From: Paul Lawrence Date: Tue, 22 Sep 2020 14:59:41 -0700 Subject: [PATCH] ANDROID: Incremental fs: Add per UID read timeouts Bug: 169056129 Test: incfs_test passes Signed-off-by: Paul Lawrence Change-Id: I8cad9ee4095123bafba33abb65bbb339ba6ff8b5 --- fs/incfs/data_mgmt.c | 38 ++++- fs/incfs/data_mgmt.h | 10 +- fs/incfs/pseudo_files.c | 94 +++++++++++ fs/incfs/vfs.c | 42 ++++- include/uapi/linux/incrementalfs.h | 96 ++++++++++- .../selftests/filesystems/incfs/incfs_test.c | 156 ++++++++++++++++++ 6 files changed, 417 insertions(+), 19 deletions(-) diff --git a/fs/incfs/data_mgmt.c b/fs/incfs/data_mgmt.c index 4100e900b09b..14cba02b8189 100644 --- a/fs/incfs/data_mgmt.c +++ b/fs/incfs/data_mgmt.c @@ -3,6 +3,7 @@ * Copyright 2019 Google LLC */ #include +#include #include #include #include @@ -49,6 +50,7 @@ struct mount_info *incfs_alloc_mount_info(struct super_block *sb, spin_lock_init(&mi->mi_log.rl_lock); spin_lock_init(&mi->pending_read_lock); INIT_LIST_HEAD(&mi->mi_reads_list_head); + spin_lock_init(&mi->mi_per_uid_read_timeouts_lock); error = incfs_realloc_mount_info(mi, options); if (error) @@ -117,6 +119,7 @@ void incfs_free_mount_info(struct mount_info *mi) kfree(mi->mi_log.rl_ring_buf); kfree(mi->log_xattr); kfree(mi->pending_read_xattr); + kfree(mi->mi_per_uid_read_timeouts); kfree(mi); } @@ -933,7 +936,8 @@ static void notify_pending_reads(struct mount_info *mi, } static int wait_for_data_block(struct data_file *df, int block_index, - int timeout_ms, + int min_time_ms, int min_pending_time_ms, + int max_pending_time_ms, struct data_file_block *res_block) { struct data_file_block block = {}; @@ -942,6 +946,7 @@ static int wait_for_data_block(struct data_file *df, int block_index, struct mount_info *mi = NULL; int error = 0; int wait_res = 0; + u64 time; if (!df || !res_block) return -EFAULT; @@ -969,11 +974,13 @@ static int wait_for_data_block(struct data_file *df, int block_index, /* If the block was found, just return it. No need to wait. */ if (is_data_block_present(&block)) { + if (min_time_ms) + error = msleep_interruptible(min_time_ms); *res_block = block; - return 0; + return error; } else { /* If it's not found, create a pending read */ - if (timeout_ms != 0) { + if (max_pending_time_ms != 0) { read = add_pending_read(df, block_index); if (!read) return -ENOMEM; @@ -983,11 +990,14 @@ static int wait_for_data_block(struct data_file *df, int block_index, } } + if (min_pending_time_ms) + time = ktime_get_ns(); + /* Wait for notifications about block's arrival */ wait_res = wait_event_interruptible_timeout(segment->new_data_arrival_wq, - (is_read_done(read)), - msecs_to_jiffies(timeout_ms)); + (is_read_done(read)), + msecs_to_jiffies(max_pending_time_ms)); /* Woke up, the pending read is no longer needed. */ remove_pending_read(df, read); @@ -1005,6 +1015,16 @@ static int wait_for_data_block(struct data_file *df, int block_index, return wait_res; } + if (min_pending_time_ms) { + time = div_u64(ktime_get_ns() - time, 1000000); + if (min_pending_time_ms > time) { + error = msleep_interruptible( + min_pending_time_ms - time); + if (error) + return error; + } + } + error = down_read_killable(&segment->rwsem); if (error) return error; @@ -1032,8 +1052,9 @@ static int wait_for_data_block(struct data_file *df, int block_index, } ssize_t incfs_read_data_file_block(struct mem_range dst, struct file *f, - int index, int timeout_ms, - struct mem_range tmp) + int index, int min_time_ms, + int min_pending_time_ms, int max_pending_time_ms, + struct mem_range tmp) { loff_t pos; ssize_t result; @@ -1052,7 +1073,8 @@ ssize_t incfs_read_data_file_block(struct mem_range dst, struct file *f, mi = df->df_mount_info; bf = df->df_backing_file_context->bc_file; - result = wait_for_data_block(df, index, timeout_ms, &block); + result = wait_for_data_block(df, index, min_time_ms, + min_pending_time_ms, max_pending_time_ms, &block); if (result < 0) goto out; diff --git a/fs/incfs/data_mgmt.h b/fs/incfs/data_mgmt.h index 42922b2f9db1..13b663a1d53e 100644 --- a/fs/incfs/data_mgmt.h +++ b/fs/incfs/data_mgmt.h @@ -163,6 +163,11 @@ struct mount_info { /* Number of blocks written since mount */ atomic_t mi_blocks_written; + + /* Per UID read timeouts */ + spinlock_t mi_per_uid_read_timeouts_lock; + struct incfs_per_uid_read_timeouts *mi_per_uid_read_timeouts; + int mi_per_uid_read_timeouts_size; }; struct data_file_block { @@ -327,8 +332,9 @@ struct dir_file *incfs_open_dir_file(struct mount_info *mi, struct file *bf); void incfs_free_dir_file(struct dir_file *dir); ssize_t incfs_read_data_file_block(struct mem_range dst, struct file *f, - int index, int timeout_ms, - struct mem_range tmp); + int index, int min_time_ms, + int min_pending_time_ms, int max_pending_time_ms, + struct mem_range tmp); int incfs_get_filled_blocks(struct data_file *df, struct incfs_file_data *fd, diff --git a/fs/incfs/pseudo_files.c b/fs/incfs/pseudo_files.c index 07a4eb3d16f1..7c20d452c031 100644 --- a/fs/incfs/pseudo_files.c +++ b/fs/incfs/pseudo_files.c @@ -940,6 +940,96 @@ out: return error; } +static long ioctl_get_read_timeouts(struct mount_info *mi, void __user *arg) +{ + struct incfs_get_read_timeouts_args __user *args_usr_ptr = arg; + struct incfs_get_read_timeouts_args args = {}; + int error = 0; + struct incfs_per_uid_read_timeouts *buffer; + int size; + + if (copy_from_user(&args, args_usr_ptr, sizeof(args))) + return -EINVAL; + + if (args.timeouts_array_size_out > INCFS_DATA_FILE_BLOCK_SIZE) + return -EINVAL; + + buffer = kzalloc(args.timeouts_array_size_out, GFP_NOFS); + if (!buffer) + return -ENOMEM; + + spin_lock(&mi->mi_per_uid_read_timeouts_lock); + size = mi->mi_per_uid_read_timeouts_size; + if (args.timeouts_array_size < size) + error = -E2BIG; + else if (size) + memcpy(buffer, mi->mi_per_uid_read_timeouts, size); + spin_unlock(&mi->mi_per_uid_read_timeouts_lock); + + args.timeouts_array_size_out = size; + if (!error && size) + if (copy_to_user(u64_to_user_ptr(args.timeouts_array), buffer, + size)) + error = -EFAULT; + + if (!error || error == -E2BIG) + if (copy_to_user(args_usr_ptr, &args, sizeof(args)) > 0) + error = -EFAULT; + + kfree(buffer); + return error; +} + +static long ioctl_set_read_timeouts(struct mount_info *mi, void __user *arg) +{ + struct incfs_set_read_timeouts_args __user *args_usr_ptr = arg; + struct incfs_set_read_timeouts_args args = {}; + int error = 0; + int size; + struct incfs_per_uid_read_timeouts *buffer = NULL, *tmp; + int i; + + if (copy_from_user(&args, args_usr_ptr, sizeof(args))) + return -EINVAL; + + size = args.timeouts_array_size; + if (size) { + if (size > INCFS_DATA_FILE_BLOCK_SIZE || + size % sizeof(*buffer) != 0) + return -EINVAL; + + buffer = kzalloc(size, GFP_NOFS); + if (!buffer) + return -ENOMEM; + + if (copy_from_user(buffer, u64_to_user_ptr(args.timeouts_array), + size)) { + error = -EINVAL; + goto out; + } + + for (i = 0; i < size / sizeof(*buffer); ++i) { + struct incfs_per_uid_read_timeouts *t = &buffer[i]; + + if (t->min_pending_time_ms > t->max_pending_time_ms) { + error = -EINVAL; + goto out; + } + } + } + + spin_lock(&mi->mi_per_uid_read_timeouts_lock); + mi->mi_per_uid_read_timeouts_size = size; + tmp = mi->mi_per_uid_read_timeouts; + mi->mi_per_uid_read_timeouts = buffer; + buffer = tmp; + spin_unlock(&mi->mi_per_uid_read_timeouts_lock); + +out: + kfree(buffer); + return error; +} + static long pending_reads_dispatch_ioctl(struct file *f, unsigned int req, unsigned long arg) { @@ -952,6 +1042,10 @@ static long pending_reads_dispatch_ioctl(struct file *f, unsigned int req, return ioctl_permit_fill(f, (void __user *)arg); case INCFS_IOC_CREATE_MAPPED_FILE: return ioctl_create_mapped_file(mi, (void __user *)arg); + case INCFS_IOC_GET_READ_TIMEOUTS: + return ioctl_get_read_timeouts(mi, (void __user *)arg); + case INCFS_IOC_SET_READ_TIMEOUTS: + return ioctl_set_read_timeouts(mi, (void __user *)arg); default: return -EINVAL; } diff --git a/fs/incfs/vfs.c b/fs/incfs/vfs.c index eddc0501270e..c0a5181cd3ef 100644 --- a/fs/incfs/vfs.c +++ b/fs/incfs/vfs.c @@ -416,6 +416,39 @@ static struct dentry *open_or_create_special_dir(struct dentry *backing_dir, return index_dentry; } +static int read_single_page_timeouts(struct data_file *df, struct file *f, + int block_index, struct mem_range range, + struct mem_range tmp) +{ + struct mount_info *mi = df->df_mount_info; + u32 min_time_ms = 0; + u32 min_pending_time_ms = 0; + u32 max_pending_time_ms = U32_MAX; + int uid = current_uid().val; + int i; + + spin_lock(&mi->mi_per_uid_read_timeouts_lock); + for (i = 0; i < mi->mi_per_uid_read_timeouts_size / + sizeof(*mi->mi_per_uid_read_timeouts); ++i) { + struct incfs_per_uid_read_timeouts *t = + &mi->mi_per_uid_read_timeouts[i]; + + if(t->uid == uid) { + min_time_ms = t->min_time_ms; + min_pending_time_ms = t->min_pending_time_ms; + max_pending_time_ms = t->max_pending_time_ms; + break; + } + } + spin_unlock(&mi->mi_per_uid_read_timeouts_lock); + if (max_pending_time_ms == U32_MAX) + max_pending_time_ms = mi->mi_options.read_timeout_ms; + + return incfs_read_data_file_block(range, f, block_index, + min_time_ms, min_pending_time_ms, max_pending_time_ms, + tmp); +} + static int read_single_page(struct file *f, struct page *page) { loff_t offset = 0; @@ -426,7 +459,6 @@ static int read_single_page(struct file *f, struct page *page) int result = 0; void *page_start; int block_index; - int timeout_ms; if (!df) { SetPageError(page); @@ -439,22 +471,20 @@ static int read_single_page(struct file *f, struct page *page) block_index = (offset + df->df_mapped_offset) / INCFS_DATA_FILE_BLOCK_SIZE; size = df->df_size; - timeout_ms = df->df_mount_info->mi_options.read_timeout_ms; if (offset < size) { struct mem_range tmp = { .len = 2 * INCFS_DATA_FILE_BLOCK_SIZE }; - tmp.data = (u8 *)__get_free_pages(GFP_NOFS, get_order(tmp.len)); if (!tmp.data) { read_result = -ENOMEM; goto err; } bytes_to_read = min_t(loff_t, size - offset, PAGE_SIZE); - read_result = incfs_read_data_file_block( - range(page_start, bytes_to_read), f, block_index, - timeout_ms, tmp); + + read_result = read_single_page_timeouts(df, f, block_index, + range(page_start, bytes_to_read), tmp); free_pages((unsigned long)tmp.data, get_order(tmp.len)); } else { diff --git a/include/uapi/linux/incrementalfs.h b/include/uapi/linux/incrementalfs.h index 374d6e4f4650..0cd6b3403222 100644 --- a/include/uapi/linux/incrementalfs.h +++ b/include/uapi/linux/incrementalfs.h @@ -43,7 +43,10 @@ /* ===== ioctl requests on the command dir ===== */ -/* Create a new file */ +/* + * Create a new file + * May only be called on .pending_reads file + */ #define INCFS_IOC_CREATE_FILE \ _IOWR(INCFS_IOCTL_BASE_CODE, 30, struct incfs_new_file_args) @@ -93,14 +96,34 @@ #define INCFS_IOC_GET_FILLED_BLOCKS \ _IOR(INCFS_IOCTL_BASE_CODE, 34, struct incfs_get_filled_blocks_args) -/* Creates a new mapped file */ +/* + * Creates a new mapped file + * May only be called on .pending_reads file + */ #define INCFS_IOC_CREATE_MAPPED_FILE \ _IOWR(INCFS_IOCTL_BASE_CODE, 35, struct incfs_create_mapped_file_args) -/* Get number of blocks, total and filled */ +/* + * Get number of blocks, total and filled + * May only be called on .pending_reads file + */ #define INCFS_IOC_GET_BLOCK_COUNT \ _IOR(INCFS_IOCTL_BASE_CODE, 36, struct incfs_get_block_count_args) +/* + * Get per UID read timeouts + * May only be called on .pending_reads file + */ +#define INCFS_IOC_GET_READ_TIMEOUTS \ + _IOR(INCFS_IOCTL_BASE_CODE, 37, struct incfs_get_read_timeouts_args) + +/* + * Set per UID read timeouts + * May only be called on .pending_reads file + */ +#define INCFS_IOC_SET_READ_TIMEOUTS \ + _IOW(INCFS_IOCTL_BASE_CODE, 38, struct incfs_set_read_timeouts_args) + /* ===== sysfs feature flags ===== */ /* * Each flag is represented by a file in /sys/fs/incremental-fs/features @@ -431,6 +454,10 @@ struct incfs_create_mapped_file_args { __aligned_u64 source_offset; }; +/* + * Get information about the blocks in this file + * Argument for INCFS_IOC_GET_BLOCK_COUNT + */ struct incfs_get_block_count_args { /* Total number of data blocks in the file */ __u32 total_data_blocks_out; @@ -445,4 +472,67 @@ struct incfs_get_block_count_args { __u32 filled_hash_blocks_out; }; +/* Description of timeouts for one UID */ +struct incfs_per_uid_read_timeouts { + /* UID to apply these timeouts to */ + __u32 uid; + + /* + * Min time to read any block. Note that this doesn't apply to reads + * which are satisfied from the page cache. + */ + __u32 min_time_ms; + + /* + * Min time to satisfy a pending read. Must be >= min_time_ms. Any + * pending read which is filled before this time will be delayed so + * that the total read time >= this value. + */ + __u32 min_pending_time_ms; + + /* + * Max time to satisfy a pending read before the read times out. + * If set to U32_MAX, defaults to mount options read_timeout_ms= + * Must be >= min_pending_time_ms + */ + __u32 max_pending_time_ms; +}; + +/* + * Get the read timeouts array + * Argument for INCFS_IOC_GET_READ_TIMEOUTS + */ +struct incfs_get_read_timeouts_args { + /* + * A pointer to a buffer to fill with the current timeouts + * + * Equivalent to struct incfs_per_uid_read_timeouts * + */ + __aligned_u64 timeouts_array; + + /* Size of above buffer in bytes */ + __u32 timeouts_array_size; + + /* Size used in bytes, or size needed if -ENOMEM returned */ + __u32 timeouts_array_size_out; +}; + +/* + * Set the read timeouts array + * Arguments for INCFS_IOC_SET_READ_TIMEOUTS + */ +struct incfs_set_read_timeouts_args { + /* + * A pointer to an array containing the new timeouts + * This will replace any existing timeouts + * + * Equivalent to struct incfs_per_uid_read_timeouts * + */ + __aligned_u64 timeouts_array; + + /* Size of above array in bytes. Must be < 256 */ + __u32 timeouts_array_size; +}; + + #endif /* _UAPI_LINUX_INCREMENTALFS_H */ diff --git a/tools/testing/selftests/filesystems/incfs/incfs_test.c b/tools/testing/selftests/filesystems/incfs/incfs_test.c index 38acad30c10c..881623871a8e 100644 --- a/tools/testing/selftests/filesystems/incfs/incfs_test.c +++ b/tools/testing/selftests/filesystems/incfs/incfs_test.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include @@ -3379,6 +3380,160 @@ out: return result; } +static int is_close(struct timespec *start, int expected_ms) +{ + const int allowed_variance = 100; + int result = TEST_FAILURE; + struct timespec finish; + int diff; + + TESTEQUAL(clock_gettime(CLOCK_MONOTONIC, &finish), 0); + diff = (finish.tv_sec - start->tv_sec) * 1000 + + (finish.tv_nsec - start->tv_nsec) / 1000000; + + TESTCOND(diff >= expected_ms - allowed_variance); + TESTCOND(diff <= expected_ms + allowed_variance); + result = TEST_SUCCESS; +out: + return result; +} + +static int per_uid_read_timeouts_test(const char *mount_dir) +{ + struct test_file file = { + .name = "file", + .size = 16 * INCFS_DATA_FILE_BLOCK_SIZE + }; + + int result = TEST_FAILURE; + char *backing_dir = NULL; + int pid; + int cmd_fd = -1; + char *filename = NULL; + int fd = -1; + struct timespec start; + char buffer[4096]; + struct incfs_per_uid_read_timeouts purt_get[1]; + struct incfs_get_read_timeouts_args grt = { + ptr_to_u64(purt_get), + sizeof(purt_get) + }; + struct incfs_per_uid_read_timeouts purt_set[] = { + { + .uid = 0, + .min_time_ms = 1000, + .min_pending_time_ms = 2000, + .max_pending_time_ms = 3000, + }, + }; + struct incfs_set_read_timeouts_args srt = { + ptr_to_u64(purt_set), + sizeof(purt_set) + }; + int status; + + TEST(backing_dir = create_backing_dir(mount_dir), backing_dir); + TESTEQUAL(mount_fs_opt(mount_dir, backing_dir, + "read_timeout_ms=1000,readahead=0", false), 0); + + TEST(cmd_fd = open_commands_file(mount_dir), cmd_fd != -1); + TESTEQUAL(emit_file(cmd_fd, NULL, file.name, &file.id, file.size, + NULL), 0); + + TEST(filename = concat_file_name(mount_dir, file.name), filename); + TEST(fd = open(filename, O_RDONLY | O_CLOEXEC), fd != -1); + TESTEQUAL(fcntl(fd, F_SETFD, fcntl(fd, F_GETFD) | FD_CLOEXEC), 0); + + /* Default mount options read failure is 1000 */ + TESTEQUAL(clock_gettime(CLOCK_MONOTONIC, &start), 0); + TESTEQUAL(pread(fd, buffer, sizeof(buffer), 0), -1); + TESTEQUAL(is_close(&start, 1000), 0); + + grt.timeouts_array_size = 0; + TESTEQUAL(ioctl(cmd_fd, INCFS_IOC_GET_READ_TIMEOUTS, &grt), 0); + TESTEQUAL(grt.timeouts_array_size_out, 0); + + /* Set it to 3000 */ + TESTEQUAL(ioctl(cmd_fd, INCFS_IOC_SET_READ_TIMEOUTS, &srt), 0); + TESTEQUAL(clock_gettime(CLOCK_MONOTONIC, &start), 0); + TESTEQUAL(pread(fd, buffer, sizeof(buffer), 0), -1); + TESTEQUAL(is_close(&start, 3000), 0); + TESTEQUAL(ioctl(cmd_fd, INCFS_IOC_GET_READ_TIMEOUTS, &grt), -1); + TESTEQUAL(errno, E2BIG); + TESTEQUAL(grt.timeouts_array_size_out, sizeof(purt_get)); + grt.timeouts_array_size = sizeof(purt_get); + TESTEQUAL(ioctl(cmd_fd, INCFS_IOC_GET_READ_TIMEOUTS, &grt), 0); + TESTEQUAL(grt.timeouts_array_size_out, sizeof(purt_get)); + TESTEQUAL(purt_get[0].uid, purt_set[0].uid); + TESTEQUAL(purt_get[0].min_time_ms, purt_set[0].min_time_ms); + TESTEQUAL(purt_get[0].min_pending_time_ms, + purt_set[0].min_pending_time_ms); + TESTEQUAL(purt_get[0].max_pending_time_ms, + purt_set[0].max_pending_time_ms); + + /* Still 1000 in UID 2 */ + TESTEQUAL(clock_gettime(CLOCK_MONOTONIC, &start), 0); + TEST(pid = fork(), pid != -1); + if (pid == 0) { + TESTEQUAL(setuid(2), 0); + TESTEQUAL(pread(fd, buffer, sizeof(buffer), 0), -1); + exit(0); + } + TESTNE(wait(&status), -1); + TESTEQUAL(WEXITSTATUS(status), 0); + TESTEQUAL(is_close(&start, 1000), 0); + + /* Set it to default */ + purt_set[0].max_pending_time_ms = UINT32_MAX; + TESTEQUAL(ioctl(cmd_fd, INCFS_IOC_SET_READ_TIMEOUTS, &srt), 0); + TESTEQUAL(clock_gettime(CLOCK_MONOTONIC, &start), 0); + TESTEQUAL(pread(fd, buffer, sizeof(buffer), 0), -1); + TESTEQUAL(is_close(&start, 1000), 0); + + /* Test min read time */ + TESTEQUAL(emit_test_block(mount_dir, &file, 0), 0); + TESTEQUAL(clock_gettime(CLOCK_MONOTONIC, &start), 0); + TESTEQUAL(pread(fd, buffer, sizeof(buffer), 0), sizeof(buffer)); + TESTEQUAL(is_close(&start, 1000), 0); + + /* Test min pending time */ + purt_set[0].uid = 2; + TESTEQUAL(ioctl(cmd_fd, INCFS_IOC_SET_READ_TIMEOUTS, &srt), 0); + TESTEQUAL(clock_gettime(CLOCK_MONOTONIC, &start), 0); + TEST(pid = fork(), pid != -1); + if (pid == 0) { + TESTEQUAL(setuid(2), 0); + TESTEQUAL(pread(fd, buffer, sizeof(buffer), sizeof(buffer)), + sizeof(buffer)); + exit(0); + } + sleep(1); + TESTEQUAL(emit_test_block(mount_dir, &file, 1), 0); + TESTNE(wait(&status), -1); + TESTEQUAL(WEXITSTATUS(status), 0); + TESTEQUAL(is_close(&start, 2000), 0); + + /* Clear timeouts */ + srt.timeouts_array_size = 0; + TESTEQUAL(ioctl(cmd_fd, INCFS_IOC_SET_READ_TIMEOUTS, &srt), 0); + grt.timeouts_array_size = 0; + TESTEQUAL(ioctl(cmd_fd, INCFS_IOC_GET_READ_TIMEOUTS, &grt), 0); + TESTEQUAL(grt.timeouts_array_size_out, 0); + + result = TEST_SUCCESS; +out: + close(fd); + + if (pid == 0) + exit(result); + + free(filename); + close(cmd_fd); + umount(backing_dir); + free(backing_dir); + return result; +} + static char *setup_mount_dir() { struct stat st; @@ -3492,6 +3647,7 @@ int main(int argc, char *argv[]) MAKE_TEST(compatibility_test), MAKE_TEST(data_block_count_test), MAKE_TEST(hash_block_count_test), + MAKE_TEST(per_uid_read_timeouts_test), }; #undef MAKE_TEST