Skip to content
82 changes: 46 additions & 36 deletions fs/iomap/buffered-io.c
Original file line number Diff line number Diff line change
Expand Up @@ -860,12 +860,11 @@ static int iomap_write_begin(struct iomap_iter *iter, loff_t pos,

out_unlock:
__iomap_put_folio(iter, pos, 0, folio);
iomap_write_failed(iter->inode, pos, len);

return status;
}

static size_t __iomap_write_end(struct inode *inode, loff_t pos, size_t len,
static bool __iomap_write_end(struct inode *inode, loff_t pos, size_t len,
size_t copied, struct folio *folio)
{
flush_dcache_folio(folio);
Expand All @@ -882,14 +881,14 @@ static size_t __iomap_write_end(struct inode *inode, loff_t pos, size_t len,
* redo the whole thing.
*/
if (unlikely(copied < len && !folio_test_uptodate(folio)))
return 0;
return false;
iomap_set_range_uptodate(folio, offset_in_folio(folio, pos), len);
iomap_set_range_dirty(folio, offset_in_folio(folio, pos), copied);
filemap_dirty_folio(inode->i_mapping, folio);
return copied;
return true;
}

static size_t iomap_write_end_inline(const struct iomap_iter *iter,
static void iomap_write_end_inline(const struct iomap_iter *iter,
struct folio *folio, loff_t pos, size_t copied)
{
const struct iomap *iomap = &iter->iomap;
Expand All @@ -904,49 +903,54 @@ static size_t iomap_write_end_inline(const struct iomap_iter *iter,
kunmap_local(addr);

mark_inode_dirty(iter->inode);
return copied;
}

/* Returns the number of bytes copied. May be 0. Cannot be an errno. */
static size_t iomap_write_end(struct iomap_iter *iter, loff_t pos, size_t len,
/*
* Returns true if all copied bytes have been written to the pagecache,
* otherwise return false.
*/
static bool iomap_write_end(struct iomap_iter *iter, loff_t pos, size_t len,
size_t copied, struct folio *folio)
{
const struct iomap *srcmap = iomap_iter_srcmap(iter);
loff_t old_size = iter->inode->i_size;
size_t ret;
size_t written;

if (srcmap->type == IOMAP_INLINE) {
ret = iomap_write_end_inline(iter, folio, pos, copied);
iomap_write_end_inline(iter, folio, pos, copied);
written = copied;
} else if (srcmap->flags & IOMAP_F_BUFFER_HEAD) {
ret = block_write_end(NULL, iter->inode->i_mapping, pos, len,
copied, &folio->page, NULL);
written = block_write_end(NULL, iter->inode->i_mapping, pos,
len, copied, &folio->page, NULL);
WARN_ON_ONCE(written != copied && written != 0);
} else {
ret = __iomap_write_end(iter->inode, pos, len, copied, folio);
written = __iomap_write_end(iter->inode, pos, len, copied,
folio) ? copied : 0;
}

/*
* Update the in-memory inode size after copying the data into the page
* cache. It's up to the file system to write the updated size to disk,
* preferably after I/O completion so that no stale data is exposed.
* Only once that's done can we unlock and release the folio.
*/
if (pos + ret > old_size) {
i_size_write(iter->inode, pos + ret);
if (pos + written > old_size) {
i_size_write(iter->inode, pos + written);
iter->iomap.flags |= IOMAP_F_SIZE_CHANGED;
}
__iomap_put_folio(iter, pos, ret, folio);
__iomap_put_folio(iter, pos, written, folio);

if (old_size < pos)
pagecache_isize_extended(iter->inode, old_size, pos);
if (ret < len)
iomap_write_failed(iter->inode, pos + ret, len - ret);
return ret;

return written == copied;
Comment on lines 943 to +946
}

static loff_t iomap_write_iter(struct iomap_iter *iter, struct iov_iter *i)
{
loff_t length = iomap_length(iter);
loff_t pos = iter->pos;
ssize_t written = 0;
ssize_t total_written = 0;
long status = 0;
struct address_space *mapping = iter->inode->i_mapping;
size_t chunk = mapping_max_folio_size(mapping);
Expand All @@ -957,6 +961,7 @@ static loff_t iomap_write_iter(struct iomap_iter *iter, struct iov_iter *i)
size_t offset; /* Offset into folio */
size_t bytes; /* Bytes to write to folio */
size_t copied; /* Bytes copied from user */
size_t written; /* Bytes have been written */

bytes = iov_iter_count(i);
retry:
Expand Down Expand Up @@ -986,8 +991,10 @@ static loff_t iomap_write_iter(struct iomap_iter *iter, struct iov_iter *i)
}

status = iomap_write_begin(iter, pos, bytes, &folio);
if (unlikely(status))
if (unlikely(status)) {
iomap_write_failed(iter->inode, pos, bytes);
break;
}
if (iter->iomap.flags & IOMAP_F_STALE)
break;

Expand All @@ -999,37 +1006,38 @@ static loff_t iomap_write_iter(struct iomap_iter *iter, struct iov_iter *i)
flush_dcache_folio(folio);

copied = copy_folio_from_iter_atomic(folio, offset, bytes, i);
status = iomap_write_end(iter, pos, bytes, copied, folio);

if (unlikely(copied != status))
iov_iter_revert(i, copied - status);
written = iomap_write_end(iter, pos, bytes, copied, folio) ?
copied : 0;

cond_resched();
if (unlikely(status == 0)) {
if (unlikely(written == 0)) {
/*
* A short copy made iomap_write_end() reject the
* thing entirely. Might be memory poisoning
* halfway through, might be a race with munmap,
* might be severe memory pressure.
*/
iomap_write_failed(iter->inode, pos, bytes);
iov_iter_revert(i, copied);

if (chunk > PAGE_SIZE)
chunk /= 2;
if (copied) {
bytes = copied;
goto retry;
}
} else {
pos += status;
written += status;
length -= status;
pos += written;
total_written += written;
length -= written;
}
} while (iov_iter_count(i) && length);

if (status == -EAGAIN) {
iov_iter_revert(i, written);
iov_iter_revert(i, total_written);
return -EAGAIN;
}
return written ? written : status;
return total_written ? total_written : status;
}

ssize_t
Expand Down Expand Up @@ -1361,6 +1369,7 @@ static loff_t iomap_unshare_iter(struct iomap_iter *iter)
int status;
size_t offset;
size_t bytes = min_t(u64, SIZE_MAX, length);
bool ret;

status = iomap_write_begin(iter, pos, bytes, &folio);
if (unlikely(status))
Expand All @@ -1372,8 +1381,8 @@ static loff_t iomap_unshare_iter(struct iomap_iter *iter)
if (bytes > folio_size(folio) - offset)
bytes = folio_size(folio) - offset;

bytes = iomap_write_end(iter, pos, bytes, bytes, folio);
if (WARN_ON_ONCE(bytes == 0))
ret = iomap_write_end(iter, pos, bytes, bytes, folio);
if (WARN_ON_ONCE(!ret))
return -EIO;

cond_resched();
Expand Down Expand Up @@ -1426,6 +1435,7 @@ static loff_t iomap_zero_iter(struct iomap_iter *iter, bool *did_zero)
int status;
size_t offset;
size_t bytes = min_t(u64, SIZE_MAX, length);
bool ret;

status = iomap_write_begin(iter, pos, bytes, &folio);
if (status)
Expand All @@ -1440,8 +1450,8 @@ static loff_t iomap_zero_iter(struct iomap_iter *iter, bool *did_zero)
folio_zero_range(folio, offset, bytes);
folio_mark_accessed(folio);

bytes = iomap_write_end(iter, pos, bytes, bytes, folio);
if (WARN_ON_ONCE(bytes == 0))
ret = iomap_write_end(iter, pos, bytes, bytes, folio);
if (WARN_ON_ONCE(!ret))
return -EIO;

pos += bytes;
Expand Down Expand Up @@ -1819,7 +1829,7 @@ static int iomap_writepage_map_blocks(struct iomap_writepage_ctx *wpc,
error = wpc->ops->map_blocks(wpc, inode, pos, dirty_len);
if (error)
break;
trace_iomap_writepage_map(inode, &wpc->iomap);
trace_iomap_writepage_map(inode, pos, dirty_len, &wpc->iomap);

map_len = min_t(u64, dirty_len,
wpc->iomap.offset + wpc->iomap.length - pos);
Expand Down
43 changes: 42 additions & 1 deletion fs/iomap/trace.h
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,48 @@ DEFINE_EVENT(iomap_class, name, \
TP_ARGS(inode, iomap))
DEFINE_IOMAP_EVENT(iomap_iter_dstmap);
DEFINE_IOMAP_EVENT(iomap_iter_srcmap);
DEFINE_IOMAP_EVENT(iomap_writepage_map);

TRACE_EVENT(iomap_writepage_map,
TP_PROTO(struct inode *inode, u64 pos, unsigned int dirty_len,
struct iomap *iomap),
TP_ARGS(inode, pos, dirty_len, iomap),
TP_STRUCT__entry(
__field(dev_t, dev)
__field(u64, ino)
__field(u64, pos)
__field(u64, dirty_len)
__field(u64, addr)
__field(loff_t, offset)
__field(u64, length)
__field(u16, type)
__field(u16, flags)
__field(dev_t, bdev)
),
TP_fast_assign(
__entry->dev = inode->i_sb->s_dev;
__entry->ino = inode->i_ino;
__entry->pos = pos;
__entry->dirty_len = dirty_len;
__entry->addr = iomap->addr;
__entry->offset = iomap->offset;
__entry->length = iomap->length;
__entry->type = iomap->type;
__entry->flags = iomap->flags;
__entry->bdev = iomap->bdev ? iomap->bdev->bd_dev : 0;
),
TP_printk("dev %d:%d ino 0x%llx bdev %d:%d pos 0x%llx dirty len 0x%llx "
"addr 0x%llx offset 0x%llx length 0x%llx type %s flags %s",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->ino,
MAJOR(__entry->bdev), MINOR(__entry->bdev),
__entry->pos,
__entry->dirty_len,
__entry->addr,
__entry->offset,
__entry->length,
__print_symbolic(__entry->type, IOMAP_TYPE_STRINGS),
__print_flags(__entry->flags, "|", IOMAP_F_FLAGS_STRINGS))
);

TRACE_EVENT(iomap_iter,
TP_PROTO(struct iomap_iter *iter, const void *ops,
Expand Down
Loading