diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index 30ef5a73bb65..6267b2f4af96 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -860,12 +860,11 @@ static int iomap_write_begin(struct iomap_iter *iter, loff_t pos, out_unlock: __iomap_put_folio(iter, pos, 0, folio); - iomap_write_failed(iter->inode, pos, len); return status; } -static size_t __iomap_write_end(struct inode *inode, loff_t pos, size_t len, +static bool __iomap_write_end(struct inode *inode, loff_t pos, size_t len, size_t copied, struct folio *folio) { flush_dcache_folio(folio); @@ -882,14 +881,14 @@ static size_t __iomap_write_end(struct inode *inode, loff_t pos, size_t len, * redo the whole thing. */ if (unlikely(copied < len && !folio_test_uptodate(folio))) - return 0; + return false; iomap_set_range_uptodate(folio, offset_in_folio(folio, pos), len); iomap_set_range_dirty(folio, offset_in_folio(folio, pos), copied); filemap_dirty_folio(inode->i_mapping, folio); - return copied; + return true; } -static size_t iomap_write_end_inline(const struct iomap_iter *iter, +static void iomap_write_end_inline(const struct iomap_iter *iter, struct folio *folio, loff_t pos, size_t copied) { const struct iomap *iomap = &iter->iomap; @@ -904,49 +903,54 @@ static size_t iomap_write_end_inline(const struct iomap_iter *iter, kunmap_local(addr); mark_inode_dirty(iter->inode); - return copied; } -/* Returns the number of bytes copied. May be 0. Cannot be an errno. */ -static size_t iomap_write_end(struct iomap_iter *iter, loff_t pos, size_t len, +/* + * Returns true if all copied bytes have been written to the pagecache, + * otherwise return false. + */ +static bool iomap_write_end(struct iomap_iter *iter, loff_t pos, size_t len, size_t copied, struct folio *folio) { const struct iomap *srcmap = iomap_iter_srcmap(iter); loff_t old_size = iter->inode->i_size; - size_t ret; + size_t written; if (srcmap->type == IOMAP_INLINE) { - ret = iomap_write_end_inline(iter, folio, pos, copied); + iomap_write_end_inline(iter, folio, pos, copied); + written = copied; } else if (srcmap->flags & IOMAP_F_BUFFER_HEAD) { - ret = block_write_end(NULL, iter->inode->i_mapping, pos, len, - copied, &folio->page, NULL); + written = block_write_end(NULL, iter->inode->i_mapping, pos, + len, copied, &folio->page, NULL); + WARN_ON_ONCE(written != copied && written != 0); } else { - ret = __iomap_write_end(iter->inode, pos, len, copied, folio); + written = __iomap_write_end(iter->inode, pos, len, copied, + folio) ? copied : 0; } /* * Update the in-memory inode size after copying the data into the page * cache. It's up to the file system to write the updated size to disk, * preferably after I/O completion so that no stale data is exposed. + * Only once that's done can we unlock and release the folio. */ - if (pos + ret > old_size) { - i_size_write(iter->inode, pos + ret); + if (pos + written > old_size) { + i_size_write(iter->inode, pos + written); iter->iomap.flags |= IOMAP_F_SIZE_CHANGED; } - __iomap_put_folio(iter, pos, ret, folio); + __iomap_put_folio(iter, pos, written, folio); if (old_size < pos) pagecache_isize_extended(iter->inode, old_size, pos); - if (ret < len) - iomap_write_failed(iter->inode, pos + ret, len - ret); - return ret; + + return written == copied; } static loff_t iomap_write_iter(struct iomap_iter *iter, struct iov_iter *i) { loff_t length = iomap_length(iter); loff_t pos = iter->pos; - ssize_t written = 0; + ssize_t total_written = 0; long status = 0; struct address_space *mapping = iter->inode->i_mapping; size_t chunk = mapping_max_folio_size(mapping); @@ -957,6 +961,7 @@ static loff_t iomap_write_iter(struct iomap_iter *iter, struct iov_iter *i) size_t offset; /* Offset into folio */ size_t bytes; /* Bytes to write to folio */ size_t copied; /* Bytes copied from user */ + size_t written; /* Bytes have been written */ bytes = iov_iter_count(i); retry: @@ -986,8 +991,10 @@ static loff_t iomap_write_iter(struct iomap_iter *iter, struct iov_iter *i) } status = iomap_write_begin(iter, pos, bytes, &folio); - if (unlikely(status)) + if (unlikely(status)) { + iomap_write_failed(iter->inode, pos, bytes); break; + } if (iter->iomap.flags & IOMAP_F_STALE) break; @@ -999,19 +1006,20 @@ static loff_t iomap_write_iter(struct iomap_iter *iter, struct iov_iter *i) flush_dcache_folio(folio); copied = copy_folio_from_iter_atomic(folio, offset, bytes, i); - status = iomap_write_end(iter, pos, bytes, copied, folio); - - if (unlikely(copied != status)) - iov_iter_revert(i, copied - status); + written = iomap_write_end(iter, pos, bytes, copied, folio) ? + copied : 0; cond_resched(); - if (unlikely(status == 0)) { + if (unlikely(written == 0)) { /* * A short copy made iomap_write_end() reject the * thing entirely. Might be memory poisoning * halfway through, might be a race with munmap, * might be severe memory pressure. */ + iomap_write_failed(iter->inode, pos, bytes); + iov_iter_revert(i, copied); + if (chunk > PAGE_SIZE) chunk /= 2; if (copied) { @@ -1019,17 +1027,17 @@ static loff_t iomap_write_iter(struct iomap_iter *iter, struct iov_iter *i) goto retry; } } else { - pos += status; - written += status; - length -= status; + pos += written; + total_written += written; + length -= written; } } while (iov_iter_count(i) && length); if (status == -EAGAIN) { - iov_iter_revert(i, written); + iov_iter_revert(i, total_written); return -EAGAIN; } - return written ? written : status; + return total_written ? total_written : status; } ssize_t @@ -1361,6 +1369,7 @@ static loff_t iomap_unshare_iter(struct iomap_iter *iter) int status; size_t offset; size_t bytes = min_t(u64, SIZE_MAX, length); + bool ret; status = iomap_write_begin(iter, pos, bytes, &folio); if (unlikely(status)) @@ -1372,8 +1381,8 @@ static loff_t iomap_unshare_iter(struct iomap_iter *iter) if (bytes > folio_size(folio) - offset) bytes = folio_size(folio) - offset; - bytes = iomap_write_end(iter, pos, bytes, bytes, folio); - if (WARN_ON_ONCE(bytes == 0)) + ret = iomap_write_end(iter, pos, bytes, bytes, folio); + if (WARN_ON_ONCE(!ret)) return -EIO; cond_resched(); @@ -1426,6 +1435,7 @@ static loff_t iomap_zero_iter(struct iomap_iter *iter, bool *did_zero) int status; size_t offset; size_t bytes = min_t(u64, SIZE_MAX, length); + bool ret; status = iomap_write_begin(iter, pos, bytes, &folio); if (status) @@ -1440,8 +1450,8 @@ static loff_t iomap_zero_iter(struct iomap_iter *iter, bool *did_zero) folio_zero_range(folio, offset, bytes); folio_mark_accessed(folio); - bytes = iomap_write_end(iter, pos, bytes, bytes, folio); - if (WARN_ON_ONCE(bytes == 0)) + ret = iomap_write_end(iter, pos, bytes, bytes, folio); + if (WARN_ON_ONCE(!ret)) return -EIO; pos += bytes; @@ -1819,7 +1829,7 @@ static int iomap_writepage_map_blocks(struct iomap_writepage_ctx *wpc, error = wpc->ops->map_blocks(wpc, inode, pos, dirty_len); if (error) break; - trace_iomap_writepage_map(inode, &wpc->iomap); + trace_iomap_writepage_map(inode, pos, dirty_len, &wpc->iomap); map_len = min_t(u64, dirty_len, wpc->iomap.offset + wpc->iomap.length - pos); diff --git a/fs/iomap/trace.h b/fs/iomap/trace.h index c16fd55f5595..3ef694f9489f 100644 --- a/fs/iomap/trace.h +++ b/fs/iomap/trace.h @@ -154,7 +154,48 @@ DEFINE_EVENT(iomap_class, name, \ TP_ARGS(inode, iomap)) DEFINE_IOMAP_EVENT(iomap_iter_dstmap); DEFINE_IOMAP_EVENT(iomap_iter_srcmap); -DEFINE_IOMAP_EVENT(iomap_writepage_map); + +TRACE_EVENT(iomap_writepage_map, + TP_PROTO(struct inode *inode, u64 pos, unsigned int dirty_len, + struct iomap *iomap), + TP_ARGS(inode, pos, dirty_len, iomap), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(u64, ino) + __field(u64, pos) + __field(u64, dirty_len) + __field(u64, addr) + __field(loff_t, offset) + __field(u64, length) + __field(u16, type) + __field(u16, flags) + __field(dev_t, bdev) + ), + TP_fast_assign( + __entry->dev = inode->i_sb->s_dev; + __entry->ino = inode->i_ino; + __entry->pos = pos; + __entry->dirty_len = dirty_len; + __entry->addr = iomap->addr; + __entry->offset = iomap->offset; + __entry->length = iomap->length; + __entry->type = iomap->type; + __entry->flags = iomap->flags; + __entry->bdev = iomap->bdev ? iomap->bdev->bd_dev : 0; + ), + TP_printk("dev %d:%d ino 0x%llx bdev %d:%d pos 0x%llx dirty len 0x%llx " + "addr 0x%llx offset 0x%llx length 0x%llx type %s flags %s", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->ino, + MAJOR(__entry->bdev), MINOR(__entry->bdev), + __entry->pos, + __entry->dirty_len, + __entry->addr, + __entry->offset, + __entry->length, + __print_symbolic(__entry->type, IOMAP_TYPE_STRINGS), + __print_flags(__entry->flags, "|", IOMAP_F_FLAGS_STRINGS)) +); TRACE_EVENT(iomap_iter, TP_PROTO(struct iomap_iter *iter, const void *ops,