From 3658bbe3b252f125fb9ba880d7ac072ff68389c7 Mon Sep 17 00:00:00 2001 From: Zhang Yi Date: Tue, 20 Feb 2024 19:57:59 +0800 Subject: [PATCH 1/7] iomap: add pos and dirty_len into trace_iomap_writepage_map mainline inclusion from mainline-v6.9-rc1 commit 54943abce0927156ba9909f1a533b25410bfe2ca category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I9DN5Z CVE: NA Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=54943abce0927156ba9909f1a533b25410bfe2ca -------------------------------- Since commit fd07e0aa23c4 ("iomap: map multiple blocks at a time"), we could map multi-blocks once a time, and the dirty_len indicates the expected map length, map_len won't large than it. The pos and dirty_len means the dirty range that should be mapped to write, add them into trace_iomap_writepage_map() could be more useful for debug. Signed-off-by: Zhang Yi Link: https://lore.kernel.org/r/20240220115759.3445025-1-yi.zhang@huaweicloud.com Reviewed-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Christian Brauner (cherry picked from commit 54943abce0927156ba9909f1a533b25410bfe2ca) Signed-off-by: Wentao Guan --- fs/iomap/buffered-io.c | 2 +- fs/iomap/trace.h | 43 +++++++++++++++++++++++++++++++++++++++++- 2 files changed, 43 insertions(+), 2 deletions(-) diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index 30ef5a73bb656..b9b060fea67a8 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -1819,7 +1819,7 @@ static int iomap_writepage_map_blocks(struct iomap_writepage_ctx *wpc, error = wpc->ops->map_blocks(wpc, inode, pos, dirty_len); if (error) break; - trace_iomap_writepage_map(inode, &wpc->iomap); + trace_iomap_writepage_map(inode, pos, dirty_len, &wpc->iomap); map_len = min_t(u64, dirty_len, wpc->iomap.offset + wpc->iomap.length - pos); diff --git a/fs/iomap/trace.h b/fs/iomap/trace.h index c16fd55f5595c..3ef694f9489f0 100644 --- a/fs/iomap/trace.h +++ b/fs/iomap/trace.h @@ -154,7 +154,48 @@ DEFINE_EVENT(iomap_class, name, \ TP_ARGS(inode, iomap)) DEFINE_IOMAP_EVENT(iomap_iter_dstmap); DEFINE_IOMAP_EVENT(iomap_iter_srcmap); -DEFINE_IOMAP_EVENT(iomap_writepage_map); + +TRACE_EVENT(iomap_writepage_map, + TP_PROTO(struct inode *inode, u64 pos, unsigned int dirty_len, + struct iomap *iomap), + TP_ARGS(inode, pos, dirty_len, iomap), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(u64, ino) + __field(u64, pos) + __field(u64, dirty_len) + __field(u64, addr) + __field(loff_t, offset) + __field(u64, length) + __field(u16, type) + __field(u16, flags) + __field(dev_t, bdev) + ), + TP_fast_assign( + __entry->dev = inode->i_sb->s_dev; + __entry->ino = inode->i_ino; + __entry->pos = pos; + __entry->dirty_len = dirty_len; + __entry->addr = iomap->addr; + __entry->offset = iomap->offset; + __entry->length = iomap->length; + __entry->type = iomap->type; + __entry->flags = iomap->flags; + __entry->bdev = iomap->bdev ? iomap->bdev->bd_dev : 0; + ), + TP_printk("dev %d:%d ino 0x%llx bdev %d:%d pos 0x%llx dirty len 0x%llx " + "addr 0x%llx offset 0x%llx length 0x%llx type %s flags %s", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->ino, + MAJOR(__entry->bdev), MINOR(__entry->bdev), + __entry->pos, + __entry->dirty_len, + __entry->addr, + __entry->offset, + __entry->length, + __print_symbolic(__entry->type, IOMAP_TYPE_STRINGS), + __print_flags(__entry->flags, "|", IOMAP_F_FLAGS_STRINGS)) +); TRACE_EVENT(iomap_iter, TP_PROTO(struct iomap_iter *iter, const void *ops, From 67254abb8a4ed9688e114a23617c14a008c517da Mon Sep 17 00:00:00 2001 From: Zhang Yi Date: Wed, 20 Mar 2024 19:05:44 +0800 Subject: [PATCH 2/7] iomap: drop the write failure handles when unsharing and zeroing mainline inclusion from mainline-v6.10-rc1 category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I9DN5Z CVE: NA -------------------------------- Unsharing and zeroing can only happen within EOF, so there is never a need to perform posteof pagecache truncation if write begin fails, also partial write could never theoretically happened from iomap_write_end(), so remove both of them. Signed-off-by: Zhang Yi Link: https://lore.kernel.org/r/20240320110548.2200662-6-yi.zhang@huaweicloud.com Reviewed-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Christian Brauner (cherry picked from commit 89c6c1d91ab2dc05c6f4ad504b03169aa32694cc) Signed-off-by: Wentao Guan --- fs/iomap/buffered-io.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index b9b060fea67a8..88c7403046165 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -860,7 +860,6 @@ static int iomap_write_begin(struct iomap_iter *iter, loff_t pos, out_unlock: __iomap_put_folio(iter, pos, 0, folio); - iomap_write_failed(iter->inode, pos, len); return status; } @@ -937,8 +936,6 @@ static size_t iomap_write_end(struct iomap_iter *iter, loff_t pos, size_t len, if (old_size < pos) pagecache_isize_extended(iter->inode, old_size, pos); - if (ret < len) - iomap_write_failed(iter->inode, pos + ret, len - ret); return ret; } @@ -986,8 +983,10 @@ static loff_t iomap_write_iter(struct iomap_iter *iter, struct iov_iter *i) } status = iomap_write_begin(iter, pos, bytes, &folio); - if (unlikely(status)) + if (unlikely(status)) { + iomap_write_failed(iter->inode, pos, bytes); break; + } if (iter->iomap.flags & IOMAP_F_STALE) break; @@ -1001,6 +1000,9 @@ static loff_t iomap_write_iter(struct iomap_iter *iter, struct iov_iter *i) copied = copy_folio_from_iter_atomic(folio, offset, bytes, i); status = iomap_write_end(iter, pos, bytes, copied, folio); + if (status < bytes) + iomap_write_failed(iter->inode, pos + status, + bytes - status); if (unlikely(copied != status)) iov_iter_revert(i, copied - status); From d11ca63728672ef330c51288174d48073cc594c2 Mon Sep 17 00:00:00 2001 From: Zhang Yi Date: Wed, 20 Mar 2024 19:05:45 +0800 Subject: [PATCH 3/7] iomap: don't increase i_size if it's not a write operation mainline inclusion from mainline-v6.10-rc1 category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I9DN5Z CVE: NA -------------------------------- Increase i_size in iomap_zero_range() and iomap_unshare_iter() is not needed, the caller should handle it. Especially, when truncate partial block, we should not increase i_size beyond the new EOF here. It doesn't affect xfs and gfs2 now because they set the new file size after zero out, it doesn't matter that a transient increase in i_size, but it will affect ext4 because it set file size before truncate. So move the i_size updating logic to iomap_write_iter(). Signed-off-by: Zhang Yi Link: https://lore.kernel.org/r/20240320110548.2200662-7-yi.zhang@huaweicloud.com Reviewed-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Christian Brauner (cherry picked from commit 943bc0882cebf482422640924062a7daac5a27ba) Signed-off-by: Wentao Guan --- fs/iomap/buffered-io.c | 50 +++++++++++++++++++++--------------------- 1 file changed, 25 insertions(+), 25 deletions(-) diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index 88c7403046165..acb043c757919 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -911,32 +911,13 @@ static size_t iomap_write_end(struct iomap_iter *iter, loff_t pos, size_t len, size_t copied, struct folio *folio) { const struct iomap *srcmap = iomap_iter_srcmap(iter); - loff_t old_size = iter->inode->i_size; - size_t ret; - - if (srcmap->type == IOMAP_INLINE) { - ret = iomap_write_end_inline(iter, folio, pos, copied); - } else if (srcmap->flags & IOMAP_F_BUFFER_HEAD) { - ret = block_write_end(NULL, iter->inode->i_mapping, pos, len, - copied, &folio->page, NULL); - } else { - ret = __iomap_write_end(iter->inode, pos, len, copied, folio); - } - - /* - * Update the in-memory inode size after copying the data into the page - * cache. It's up to the file system to write the updated size to disk, - * preferably after I/O completion so that no stale data is exposed. - */ - if (pos + ret > old_size) { - i_size_write(iter->inode, pos + ret); - iter->iomap.flags |= IOMAP_F_SIZE_CHANGED; - } - __iomap_put_folio(iter, pos, ret, folio); - if (old_size < pos) - pagecache_isize_extended(iter->inode, old_size, pos); - return ret; + if (srcmap->type == IOMAP_INLINE) + return iomap_write_end_inline(iter, folio, pos, copied); + if (srcmap->flags & IOMAP_F_BUFFER_HEAD) + return block_write_end(NULL, iter->inode->i_mapping, pos, len, + copied, &folio->page, NULL); + return __iomap_write_end(iter->inode, pos, len, copied, folio); } static loff_t iomap_write_iter(struct iomap_iter *iter, struct iov_iter *i) @@ -951,6 +932,7 @@ static loff_t iomap_write_iter(struct iomap_iter *iter, struct iov_iter *i) do { struct folio *folio; + loff_t old_size; size_t offset; /* Offset into folio */ size_t bytes; /* Bytes to write to folio */ size_t copied; /* Bytes copied from user */ @@ -1000,6 +982,22 @@ static loff_t iomap_write_iter(struct iomap_iter *iter, struct iov_iter *i) copied = copy_folio_from_iter_atomic(folio, offset, bytes, i); status = iomap_write_end(iter, pos, bytes, copied, folio); + /* + * Update the in-memory inode size after copying the data into + * the page cache. It's up to the file system to write the + * updated size to disk, preferably after I/O completion so that + * no stale data is exposed. Only once that's done can we + * unlock and release the folio. + */ + old_size = iter->inode->i_size; + if (pos + status > old_size) { + i_size_write(iter->inode, pos + status); + iter->iomap.flags |= IOMAP_F_SIZE_CHANGED; + } + __iomap_put_folio(iter, pos, status, folio); + + if (old_size < pos) + pagecache_isize_extended(iter->inode, old_size, pos); if (status < bytes) iomap_write_failed(iter->inode, pos + status, bytes - status); @@ -1375,6 +1373,7 @@ static loff_t iomap_unshare_iter(struct iomap_iter *iter) bytes = folio_size(folio) - offset; bytes = iomap_write_end(iter, pos, bytes, bytes, folio); + __iomap_put_folio(iter, pos, bytes, folio); if (WARN_ON_ONCE(bytes == 0)) return -EIO; @@ -1443,6 +1442,7 @@ static loff_t iomap_zero_iter(struct iomap_iter *iter, bool *did_zero) folio_mark_accessed(folio); bytes = iomap_write_end(iter, pos, bytes, bytes, folio); + __iomap_put_folio(iter, pos, bytes, folio); if (WARN_ON_ONCE(bytes == 0)) return -EIO; From 077728a4d8a186e5269170e93952e3d2a127bf08 Mon Sep 17 00:00:00 2001 From: Zhang Yi Date: Wed, 20 Mar 2024 19:05:46 +0800 Subject: [PATCH 4/7] iomap: use a new variable to handle the written bytes in iomap_write_iter() mainline inclusion from mainline-v6.10-rc1 category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I9DN5Z CVE: NA -------------------------------- In iomap_write_iter(), the status variable used to receive the return value from iomap_write_end() is confusing, replace it with a new written variable to represent the written bytes in each cycle, no logic changes. Signed-off-by: Zhang Yi Link: https://lore.kernel.org/r/20240320110548.2200662-8-yi.zhang@huaweicloud.com Reviewed-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Christian Brauner (cherry picked from commit 1a61d74932d460f47ffaf08927dbe9d43315841f) Signed-off-by: Wentao Guan --- fs/iomap/buffered-io.c | 33 +++++++++++++++++---------------- 1 file changed, 17 insertions(+), 16 deletions(-) diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index acb043c757919..86f5b12e97883 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -924,7 +924,7 @@ static loff_t iomap_write_iter(struct iomap_iter *iter, struct iov_iter *i) { loff_t length = iomap_length(iter); loff_t pos = iter->pos; - ssize_t written = 0; + ssize_t total_written = 0; long status = 0; struct address_space *mapping = iter->inode->i_mapping; size_t chunk = mapping_max_folio_size(mapping); @@ -936,6 +936,7 @@ static loff_t iomap_write_iter(struct iomap_iter *iter, struct iov_iter *i) size_t offset; /* Offset into folio */ size_t bytes; /* Bytes to write to folio */ size_t copied; /* Bytes copied from user */ + size_t written; /* Bytes have been written */ bytes = iov_iter_count(i); retry: @@ -980,7 +981,7 @@ static loff_t iomap_write_iter(struct iomap_iter *iter, struct iov_iter *i) flush_dcache_folio(folio); copied = copy_folio_from_iter_atomic(folio, offset, bytes, i); - status = iomap_write_end(iter, pos, bytes, copied, folio); + written = iomap_write_end(iter, pos, bytes, copied, folio); /* * Update the in-memory inode size after copying the data into @@ -990,22 +991,22 @@ static loff_t iomap_write_iter(struct iomap_iter *iter, struct iov_iter *i) * unlock and release the folio. */ old_size = iter->inode->i_size; - if (pos + status > old_size) { - i_size_write(iter->inode, pos + status); + if (pos + written > old_size) { + i_size_write(iter->inode, pos + written); iter->iomap.flags |= IOMAP_F_SIZE_CHANGED; } - __iomap_put_folio(iter, pos, status, folio); + __iomap_put_folio(iter, pos, written, folio); if (old_size < pos) pagecache_isize_extended(iter->inode, old_size, pos); - if (status < bytes) - iomap_write_failed(iter->inode, pos + status, - bytes - status); - if (unlikely(copied != status)) - iov_iter_revert(i, copied - status); + if (written < bytes) + iomap_write_failed(iter->inode, pos + written, + bytes - written); + if (unlikely(copied != written)) + iov_iter_revert(i, copied - written); cond_resched(); - if (unlikely(status == 0)) { + if (unlikely(written == 0)) { /* * A short copy made iomap_write_end() reject the * thing entirely. Might be memory poisoning @@ -1019,17 +1020,17 @@ static loff_t iomap_write_iter(struct iomap_iter *iter, struct iov_iter *i) goto retry; } } else { - pos += status; - written += status; - length -= status; + pos += written; + total_written += written; + length -= written; } } while (iov_iter_count(i) && length); if (status == -EAGAIN) { - iov_iter_revert(i, written); + iov_iter_revert(i, total_written); return -EAGAIN; } - return written ? written : status; + return total_written ? total_written : status; } ssize_t From ad5c076b85a6018e9fb1030f65a215bf744dbb0c Mon Sep 17 00:00:00 2001 From: Zhang Yi Date: Wed, 20 Mar 2024 19:05:47 +0800 Subject: [PATCH 5/7] iomap: make iomap_write_end() return a boolean mainline inclusion from mainline-v6.10-rc1 category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I9DN5Z CVE: NA -------------------------------- For now, we can make sure iomap_write_end() always return 0 or copied bytes, so instead of return written bytes, convert to return a boolean to indicate the copied bytes have been written to the pagecache. Signed-off-by: Zhang Yi Link: https://lore.kernel.org/r/20240320110548.2200662-9-yi.zhang@huaweicloud.com Reviewed-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Christian Brauner (cherry picked from commit 815f4b633ba1c9c6a151f251616f23e2407fcf24) Signed-off-by: Wentao Guan --- fs/iomap/buffered-io.c | 48 +++++++++++++++++++++++++++--------------- 1 file changed, 31 insertions(+), 17 deletions(-) diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index 86f5b12e97883..a92ebb0f64b1b 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -864,7 +864,7 @@ static int iomap_write_begin(struct iomap_iter *iter, loff_t pos, return status; } -static size_t __iomap_write_end(struct inode *inode, loff_t pos, size_t len, +static bool __iomap_write_end(struct inode *inode, loff_t pos, size_t len, size_t copied, struct folio *folio) { flush_dcache_folio(folio); @@ -881,14 +881,14 @@ static size_t __iomap_write_end(struct inode *inode, loff_t pos, size_t len, * redo the whole thing. */ if (unlikely(copied < len && !folio_test_uptodate(folio))) - return 0; + return false; iomap_set_range_uptodate(folio, offset_in_folio(folio, pos), len); iomap_set_range_dirty(folio, offset_in_folio(folio, pos), copied); filemap_dirty_folio(inode->i_mapping, folio); - return copied; + return true; } -static size_t iomap_write_end_inline(const struct iomap_iter *iter, +static void iomap_write_end_inline(const struct iomap_iter *iter, struct folio *folio, loff_t pos, size_t copied) { const struct iomap *iomap = &iter->iomap; @@ -903,20 +903,31 @@ static size_t iomap_write_end_inline(const struct iomap_iter *iter, kunmap_local(addr); mark_inode_dirty(iter->inode); - return copied; } -/* Returns the number of bytes copied. May be 0. Cannot be an errno. */ -static size_t iomap_write_end(struct iomap_iter *iter, loff_t pos, size_t len, +/* + * Returns true if all copied bytes have been written to the pagecache, + * otherwise return false. + */ +static bool iomap_write_end(struct iomap_iter *iter, loff_t pos, size_t len, size_t copied, struct folio *folio) { const struct iomap *srcmap = iomap_iter_srcmap(iter); - if (srcmap->type == IOMAP_INLINE) - return iomap_write_end_inline(iter, folio, pos, copied); - if (srcmap->flags & IOMAP_F_BUFFER_HEAD) - return block_write_end(NULL, iter->inode->i_mapping, pos, len, - copied, &folio->page, NULL); + if (srcmap->type == IOMAP_INLINE) { + iomap_write_end_inline(iter, folio, pos, copied); + return true; + } + + if (srcmap->flags & IOMAP_F_BUFFER_HEAD) { + size_t bh_written; + + bh_written = block_write_end(NULL, iter->inode->i_mapping, pos, + len, copied, &folio->page, NULL); + WARN_ON_ONCE(bh_written != copied && bh_written != 0); + return bh_written == copied; + } + return __iomap_write_end(iter->inode, pos, len, copied, folio); } @@ -981,7 +992,8 @@ static loff_t iomap_write_iter(struct iomap_iter *iter, struct iov_iter *i) flush_dcache_folio(folio); copied = copy_folio_from_iter_atomic(folio, offset, bytes, i); - written = iomap_write_end(iter, pos, bytes, copied, folio); + written = iomap_write_end(iter, pos, bytes, copied, folio) ? + copied : 0; /* * Update the in-memory inode size after copying the data into @@ -1362,6 +1374,7 @@ static loff_t iomap_unshare_iter(struct iomap_iter *iter) int status; size_t offset; size_t bytes = min_t(u64, SIZE_MAX, length); + bool ret; status = iomap_write_begin(iter, pos, bytes, &folio); if (unlikely(status)) @@ -1373,9 +1386,9 @@ static loff_t iomap_unshare_iter(struct iomap_iter *iter) if (bytes > folio_size(folio) - offset) bytes = folio_size(folio) - offset; - bytes = iomap_write_end(iter, pos, bytes, bytes, folio); + ret = iomap_write_end(iter, pos, bytes, bytes, folio); __iomap_put_folio(iter, pos, bytes, folio); - if (WARN_ON_ONCE(bytes == 0)) + if (WARN_ON_ONCE(!ret)) return -EIO; cond_resched(); @@ -1428,6 +1441,7 @@ static loff_t iomap_zero_iter(struct iomap_iter *iter, bool *did_zero) int status; size_t offset; size_t bytes = min_t(u64, SIZE_MAX, length); + bool ret; status = iomap_write_begin(iter, pos, bytes, &folio); if (status) @@ -1442,9 +1456,9 @@ static loff_t iomap_zero_iter(struct iomap_iter *iter, bool *did_zero) folio_zero_range(folio, offset, bytes); folio_mark_accessed(folio); - bytes = iomap_write_end(iter, pos, bytes, bytes, folio); + ret = iomap_write_end(iter, pos, bytes, bytes, folio); __iomap_put_folio(iter, pos, bytes, folio); - if (WARN_ON_ONCE(bytes == 0)) + if (WARN_ON_ONCE(!ret)) return -EIO; pos += bytes; From 5388755d2ec96261e3c7e14560304cfb2fbf8977 Mon Sep 17 00:00:00 2001 From: Zhang Yi Date: Wed, 20 Mar 2024 19:05:48 +0800 Subject: [PATCH 6/7] iomap: do some small logical cleanup in buffered write mainline inclusion from mainline-v6.10-rc1 category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I9DN5Z CVE: NA -------------------------------- Since iomap_write_end() can never return a partial write length, the comparison between written, copied and bytes becomes useless, just merge them with the unwritten branch. Signed-off-by: Zhang Yi Link: https://lore.kernel.org/r/20240320110548.2200662-10-yi.zhang@huaweicloud.com Reviewed-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Christian Brauner (cherry picked from commit e1f453d4336d5d7fbbd1910532201b4a07a20a5c) Signed-off-by: Wentao Guan --- fs/iomap/buffered-io.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index a92ebb0f64b1b..839c02e51833d 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -1011,11 +1011,6 @@ static loff_t iomap_write_iter(struct iomap_iter *iter, struct iov_iter *i) if (old_size < pos) pagecache_isize_extended(iter->inode, old_size, pos); - if (written < bytes) - iomap_write_failed(iter->inode, pos + written, - bytes - written); - if (unlikely(copied != written)) - iov_iter_revert(i, copied - written); cond_resched(); if (unlikely(written == 0)) { @@ -1025,6 +1020,9 @@ static loff_t iomap_write_iter(struct iomap_iter *iter, struct iov_iter *i) * halfway through, might be a race with munmap, * might be severe memory pressure. */ + iomap_write_failed(iter->inode, pos, bytes); + iov_iter_revert(i, copied); + if (chunk > PAGE_SIZE) chunk /= 2; if (copied) { From 8806234aea9e02ce33fc7909b7da4d1c00370ab2 Mon Sep 17 00:00:00 2001 From: Zhang Yi Date: Mon, 3 Jun 2024 19:22:22 +0800 Subject: [PATCH 7/7] iomap: keep on increasing i_size in iomap_write_end() mainline inclusion from mainline-v6.10-rc4 category: bugfix Commit '943bc0882ceb ("iomap: don't increase i_size if it's not a write operation")' breaks xfs with realtime device on generic/561, the problem is when unaligned truncate down a xfs realtime inode with rtextsize > 1 fs block, xfs only zero out the EOF block but doesn't zero out the tail blocks that aligned to rtextsize, so if we don't increase i_size in iomap_write_end(), it could expose stale data after we do an append write beyond the aligned EOF block. xfs should zero out the tail blocks when truncate down, but before we finish that, let's fix the issue by just revert the changes in iomap_write_end(). Fixes: 943bc0882ceb ("iomap: don't increase i_size if it's not a write operation") Reported-by: Chandan Babu R Link: https://lore.kernel.org/linux-xfs/0b92a215-9d9b-3788-4504-a520778953c2@huaweicloud.com Signed-off-by: Zhang Yi Link: https://lore.kernel.org/r/20240603112222.2109341-1-yi.zhang@huaweicloud.com Tested-by: Chandan Babu R Reviewed-by: Christoph Hellwig Signed-off-by: Christian Brauner (cherry picked from commit 0841ea4a3b416554be401a91aa267b7de838de8b) Signed-off-by: Wentao Guan --- fs/iomap/buffered-io.c | 53 +++++++++++++++++++----------------------- 1 file changed, 24 insertions(+), 29 deletions(-) diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index 839c02e51833d..6267b2f4af964 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -913,22 +913,37 @@ static bool iomap_write_end(struct iomap_iter *iter, loff_t pos, size_t len, size_t copied, struct folio *folio) { const struct iomap *srcmap = iomap_iter_srcmap(iter); + loff_t old_size = iter->inode->i_size; + size_t written; if (srcmap->type == IOMAP_INLINE) { iomap_write_end_inline(iter, folio, pos, copied); - return true; + written = copied; + } else if (srcmap->flags & IOMAP_F_BUFFER_HEAD) { + written = block_write_end(NULL, iter->inode->i_mapping, pos, + len, copied, &folio->page, NULL); + WARN_ON_ONCE(written != copied && written != 0); + } else { + written = __iomap_write_end(iter->inode, pos, len, copied, + folio) ? copied : 0; } - if (srcmap->flags & IOMAP_F_BUFFER_HEAD) { - size_t bh_written; - - bh_written = block_write_end(NULL, iter->inode->i_mapping, pos, - len, copied, &folio->page, NULL); - WARN_ON_ONCE(bh_written != copied && bh_written != 0); - return bh_written == copied; + /* + * Update the in-memory inode size after copying the data into the page + * cache. It's up to the file system to write the updated size to disk, + * preferably after I/O completion so that no stale data is exposed. + * Only once that's done can we unlock and release the folio. + */ + if (pos + written > old_size) { + i_size_write(iter->inode, pos + written); + iter->iomap.flags |= IOMAP_F_SIZE_CHANGED; } + __iomap_put_folio(iter, pos, written, folio); - return __iomap_write_end(iter->inode, pos, len, copied, folio); + if (old_size < pos) + pagecache_isize_extended(iter->inode, old_size, pos); + + return written == copied; } static loff_t iomap_write_iter(struct iomap_iter *iter, struct iov_iter *i) @@ -943,7 +958,6 @@ static loff_t iomap_write_iter(struct iomap_iter *iter, struct iov_iter *i) do { struct folio *folio; - loff_t old_size; size_t offset; /* Offset into folio */ size_t bytes; /* Bytes to write to folio */ size_t copied; /* Bytes copied from user */ @@ -995,23 +1009,6 @@ static loff_t iomap_write_iter(struct iomap_iter *iter, struct iov_iter *i) written = iomap_write_end(iter, pos, bytes, copied, folio) ? copied : 0; - /* - * Update the in-memory inode size after copying the data into - * the page cache. It's up to the file system to write the - * updated size to disk, preferably after I/O completion so that - * no stale data is exposed. Only once that's done can we - * unlock and release the folio. - */ - old_size = iter->inode->i_size; - if (pos + written > old_size) { - i_size_write(iter->inode, pos + written); - iter->iomap.flags |= IOMAP_F_SIZE_CHANGED; - } - __iomap_put_folio(iter, pos, written, folio); - - if (old_size < pos) - pagecache_isize_extended(iter->inode, old_size, pos); - cond_resched(); if (unlikely(written == 0)) { /* @@ -1385,7 +1382,6 @@ static loff_t iomap_unshare_iter(struct iomap_iter *iter) bytes = folio_size(folio) - offset; ret = iomap_write_end(iter, pos, bytes, bytes, folio); - __iomap_put_folio(iter, pos, bytes, folio); if (WARN_ON_ONCE(!ret)) return -EIO; @@ -1455,7 +1451,6 @@ static loff_t iomap_zero_iter(struct iomap_iter *iter, bool *did_zero) folio_mark_accessed(folio); ret = iomap_write_end(iter, pos, bytes, bytes, folio); - __iomap_put_folio(iter, pos, bytes, folio); if (WARN_ON_ONCE(!ret)) return -EIO;