Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
141 changes: 141 additions & 0 deletions src/core_editor/graphemes.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@
use unicode_segmentation::UnicodeSegmentation;

/// Byte index of the next grapheme boundary at or after `pos`.
///
/// Returns `buf.len()` if there is no grapheme after `pos`.
///
/// # Panics
///
/// Panics if `pos` is not on a UTF-8 character boundary in `buf`.
pub fn next_grapheme_boundary(buf: &str, pos: usize) -> usize {
buf[pos..]
.grapheme_indices(true)
.nth(1)
.map(|(i, _)| pos + i)
.unwrap_or(buf.len())
}

/// Byte index of the previous grapheme boundary before `pos`.
///
/// Returns `0` if there is no grapheme before `pos`.
///
/// # Panics
///
/// Panics if `pos` is not on a UTF-8 character boundary in `buf`.
pub fn prev_grapheme_boundary(buf: &str, pos: usize) -> usize {
buf[..pos]
.grapheme_indices(true)
.next_back()
.map(|(i, _)| i)
.unwrap_or(0)
}

#[cfg(test)]
mod tests {
use super::*;

// --- next_grapheme_boundary ---------------------------------------------

#[test]
fn next_advances_one_ascii_char() {
assert_eq!(next_grapheme_boundary("abc", 0), 1);
}

#[test]
fn next_returns_buf_len_when_at_end() {
assert_eq!(next_grapheme_boundary("abc", 3), 3);
}

#[test]
fn next_on_empty_buffer_returns_zero() {
assert_eq!(next_grapheme_boundary("", 0), 0);
}

#[test]
fn next_skips_two_byte_utf8_grapheme() {
assert_eq!(next_grapheme_boundary("café!", 3), 5);
}

#[test]
fn next_at_end_returns_buf_len() {
let buf = "café";
assert_eq!(next_grapheme_boundary(buf, 3), buf.len());
}

#[test]
fn next_treats_combining_mark_as_single_grapheme() {
assert_eq!(next_grapheme_boundary("e\u{0301}", 0), 3);
}

#[test]
fn next_advances_one_cjk_char() {
assert_eq!(next_grapheme_boundary("日本", 0), 3);
}

#[test]
fn next_skips_zwj_emoji_sequence_as_one() {
// family-emoji + `!`. From 0, skip the whole 18-byte sequence and land on `!`
let prefix = "👨‍👩‍👧";
assert_eq!(next_grapheme_boundary("👨‍👩‍👧!", 0), prefix.len());
}

// --- prev_grapheme_boundary ---------------------------------------------

#[test]
fn prev_retreats_one_ascii_char() {
assert_eq!(prev_grapheme_boundary("abc", 2), 1);
}

#[test]
fn prev_at_zero_returns_zero() {
assert_eq!(prev_grapheme_boundary("abc", 0), 0);
}

#[test]
fn prev_retreats_past_two_byte_utf8_grapheme() {
// from byte 5 (end of "café") retreat past `é` to byte 3 (its start)
let buf = "café";
assert_eq!(prev_grapheme_boundary(buf, buf.len()), 3);
}

#[test]
fn prev_retreats_past_combining_mark() {
// 'a' + combined 'é' (3 bytes). From end, retreat past combined grapheme to byte 1
let buf = "ae\u{0301}";
assert_eq!(prev_grapheme_boundary(buf, buf.len()), 1);
}

#[test]
fn prev_retreats_past_zwj_emoji_sequence() {
// 'a' + family-emoji (18 bytes). From end, retreat past the family to byte 1
let buf = "a👨‍👩‍👧";
assert_eq!(prev_grapheme_boundary(buf, buf.len()), 1);
}

// --- round-trip ----------------------------------------------------------

#[test]
fn next_then_prev_returns_to_origin_for_ascii() {
let buf = "abc";
for (pos, _) in buf.grapheme_indices(true) {
assert_eq!(
prev_grapheme_boundary(buf, next_grapheme_boundary(buf, pos)),
pos,
"round-trip failed at pos {pos}"
);
}
}

#[test]
fn next_then_prev_returns_to_origin_for_unicode() {
// mix ASCII, multi-byte, combining mark, and ZWJ emoji
let buf = "a日e\u{0301}👨‍👩‍👧";
for (pos, _) in buf.grapheme_indices(true) {
assert_eq!(
prev_grapheme_boundary(buf, next_grapheme_boundary(buf, pos)),
pos,
"round-trip failed at pos {pos}"
);
}
}
}
29 changes: 5 additions & 24 deletions src/core_editor/line_buffer.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
use {
crate::core_editor::graphemes::{next_grapheme_boundary, prev_grapheme_boundary},
itertools::Itertools,
std::{convert::From, ops::Range},
unicode_segmentation::UnicodeSegmentation,
Expand Down Expand Up @@ -174,20 +175,12 @@ impl LineBuffer {

/// Cursor position *behind* the next unicode grapheme to the right from the given position
pub fn grapheme_right_index_from_pos(&self, pos: usize) -> usize {
self.lines[pos..]
.grapheme_indices(true)
.nth(1)
.map(|(i, _)| pos + i)
.unwrap_or_else(|| self.lines.len())
next_grapheme_boundary(&self.lines, pos)
}

/// Cursor position *behind* the previous unicode grapheme to the left from the given position
pub(crate) fn grapheme_left_index_from_pos(&self, pos: usize) -> usize {
self.lines[..pos]
.grapheme_indices(true)
.next_back()
.map(|(i, _)| i)
.unwrap_or(0)
prev_grapheme_boundary(&self.lines, pos)
}

/// Cursor position *behind* the next word to the right
Expand Down Expand Up @@ -223,13 +216,7 @@ impl LineBuffer {
.map(|x| self.insertion_point + x.0 + i)
.filter(|x| !is_whitespace_str(word) && *x != self.insertion_point)
})
.unwrap_or_else(|| {
self.lines
.grapheme_indices(true)
.next_back()
.map(|x| x.0)
.unwrap_or(0)
})
.unwrap_or_else(|| prev_grapheme_boundary(&self.lines, self.lines.len()))
}

/// Cursor position *at end of* the next WORD to the right
Expand All @@ -248,13 +235,7 @@ impl LineBuffer {
None
}
})
.unwrap_or_else(|| {
self.lines
.grapheme_indices(true)
.next_back()
.map(|x| x.0)
.unwrap_or(0)
})
.unwrap_or_else(|| prev_grapheme_boundary(&self.lines, self.lines.len()))
}

/// Cursor position *in front of* the next word to the right
Expand Down
1 change: 1 addition & 0 deletions src/core_editor/mod.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
mod clip_buffer;
mod edit_stack;
mod editor;
mod graphemes;
mod line_buffer;

#[cfg(feature = "system_clipboard")]
Expand Down
Loading