nushell · fdncred · May 9, 2026 · May 8, 2026
diff --git a/src/core_editor/graphemes.rs b/src/core_editor/graphemes.rs
@@ -0,0 +1,141 @@
+use unicode_segmentation::UnicodeSegmentation;
+
+/// Byte index of the next grapheme boundary at or after `pos`.
+///
+/// Returns `buf.len()` if there is no grapheme after `pos`.
+///
+/// # Panics
+///
+/// Panics if `pos` is not on a UTF-8 character boundary in `buf`.
+pub fn next_grapheme_boundary(buf: &str, pos: usize) -> usize {
+    buf[pos..]
+        .grapheme_indices(true)
+        .nth(1)
+        .map(|(i, _)| pos + i)
+        .unwrap_or(buf.len())
+}
+
+/// Byte index of the previous grapheme boundary before `pos`.
+///
+/// Returns `0` if there is no grapheme before `pos`.
+///
+/// # Panics
+///
+/// Panics if `pos` is not on a UTF-8 character boundary in `buf`.
+pub fn prev_grapheme_boundary(buf: &str, pos: usize) -> usize {
+    buf[..pos]
+        .grapheme_indices(true)
+        .next_back()
+        .map(|(i, _)| i)
+        .unwrap_or(0)
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    // --- next_grapheme_boundary ---------------------------------------------
+
+    #[test]
+    fn next_advances_one_ascii_char() {
+        assert_eq!(next_grapheme_boundary("abc", 0), 1);
+    }
+
+    #[test]
+    fn next_returns_buf_len_when_at_end() {
+        assert_eq!(next_grapheme_boundary("abc", 3), 3);
+    }
+
+    #[test]
+    fn next_on_empty_buffer_returns_zero() {
+        assert_eq!(next_grapheme_boundary("", 0), 0);
+    }
+
+    #[test]
+    fn next_skips_two_byte_utf8_grapheme() {
+        assert_eq!(next_grapheme_boundary("café!", 3), 5);
+    }
+
+    #[test]
+    fn next_at_end_returns_buf_len() {
+        let buf = "café";
+        assert_eq!(next_grapheme_boundary(buf, 3), buf.len());
+    }
+
+    #[test]
+    fn next_treats_combining_mark_as_single_grapheme() {
+        assert_eq!(next_grapheme_boundary("e\u{0301}", 0), 3);
+    }
+
+    #[test]
+    fn next_advances_one_cjk_char() {
+        assert_eq!(next_grapheme_boundary("日本", 0), 3);
+    }
+
+    #[test]
+    fn next_skips_zwj_emoji_sequence_as_one() {
+        // family-emoji + `!`. From 0, skip the whole 18-byte sequence and land on `!`
+        let prefix = "👨‍👩‍👧";
+        assert_eq!(next_grapheme_boundary("👨‍👩‍👧!", 0), prefix.len());
+    }
+
+    // --- prev_grapheme_boundary ---------------------------------------------
+
+    #[test]
+    fn prev_retreats_one_ascii_char() {
+        assert_eq!(prev_grapheme_boundary("abc", 2), 1);
+    }
+
+    #[test]
+    fn prev_at_zero_returns_zero() {
+        assert_eq!(prev_grapheme_boundary("abc", 0), 0);
+    }
+
+    #[test]
+    fn prev_retreats_past_two_byte_utf8_grapheme() {
+        // from byte 5 (end of "café") retreat past `é` to byte 3 (its start)
+        let buf = "café";
+        assert_eq!(prev_grapheme_boundary(buf, buf.len()), 3);
+    }
+
+    #[test]
+    fn prev_retreats_past_combining_mark() {
+        // 'a' + combined 'é' (3 bytes). From end, retreat past combined grapheme to byte 1
+        let buf = "ae\u{0301}";
+        assert_eq!(prev_grapheme_boundary(buf, buf.len()), 1);
+    }
+
+    #[test]
+    fn prev_retreats_past_zwj_emoji_sequence() {
+        // 'a' + family-emoji (18 bytes). From end, retreat past the family to byte 1
+        let buf = "a👨‍👩‍👧";
+        assert_eq!(prev_grapheme_boundary(buf, buf.len()), 1);
+    }
+
+    // --- round-trip ----------------------------------------------------------
+
+    #[test]
+    fn next_then_prev_returns_to_origin_for_ascii() {
+        let buf = "abc";
+        for (pos, _) in buf.grapheme_indices(true) {
+            assert_eq!(
+                prev_grapheme_boundary(buf, next_grapheme_boundary(buf, pos)),
+                pos,
+                "round-trip failed at pos {pos}"
+            );
+        }
+    }
+
+    #[test]
+    fn next_then_prev_returns_to_origin_for_unicode() {
+        // mix ASCII, multi-byte, combining mark, and ZWJ emoji
+        let buf = "a日e\u{0301}👨‍👩‍👧";
+        for (pos, _) in buf.grapheme_indices(true) {
+            assert_eq!(
+                prev_grapheme_boundary(buf, next_grapheme_boundary(buf, pos)),
+                pos,
+                "round-trip failed at pos {pos}"
+            );
+        }
+    }
+}
diff --git a/src/core_editor/line_buffer.rs b/src/core_editor/line_buffer.rs
@@ -1,4 +1,5 @@
 use {
+    crate::core_editor::graphemes::{next_grapheme_boundary, prev_grapheme_boundary},
     itertools::Itertools,
     std::{convert::From, ops::Range},
     unicode_segmentation::UnicodeSegmentation,
@@ -174,20 +175,12 @@ impl LineBuffer {
 
     /// Cursor position *behind* the next unicode grapheme to the right from the given position
     pub fn grapheme_right_index_from_pos(&self, pos: usize) -> usize {
-        self.lines[pos..]
-            .grapheme_indices(true)
-            .nth(1)
-            .map(|(i, _)| pos + i)
-            .unwrap_or_else(|| self.lines.len())
+        next_grapheme_boundary(&self.lines, pos)
     }
 
     /// Cursor position *behind* the previous unicode grapheme to the left from the given position
     pub(crate) fn grapheme_left_index_from_pos(&self, pos: usize) -> usize {
-        self.lines[..pos]
-            .grapheme_indices(true)
-            .next_back()
-            .map(|(i, _)| i)
-            .unwrap_or(0)
+        prev_grapheme_boundary(&self.lines, pos)
     }
 
     /// Cursor position *behind* the next word to the right
@@ -223,13 +216,7 @@ impl LineBuffer {
                     .map(|x| self.insertion_point + x.0 + i)
                     .filter(|x| !is_whitespace_str(word) && *x != self.insertion_point)
             })
-            .unwrap_or_else(|| {
-                self.lines
-                    .grapheme_indices(true)
-                    .next_back()
-                    .map(|x| x.0)
-                    .unwrap_or(0)
-            })
+            .unwrap_or_else(|| prev_grapheme_boundary(&self.lines, self.lines.len()))
     }
 
     /// Cursor position *at end of* the next WORD to the right
@@ -248,13 +235,7 @@ impl LineBuffer {
                     None
                 }
             })
-            .unwrap_or_else(|| {
-                self.lines
-                    .grapheme_indices(true)
-                    .next_back()
-                    .map(|x| x.0)
-                    .unwrap_or(0)
-            })
+            .unwrap_or_else(|| prev_grapheme_boundary(&self.lines, self.lines.len()))
     }
 
     /// Cursor position *in front of* the next word to the right

diff --git a/src/core_editor/mod.rs b/src/core_editor/mod.rs
@@ -1,6 +1,7 @@
 mod clip_buffer;
 mod edit_stack;
 mod editor;
+mod graphemes;
 mod line_buffer;
 
 #[cfg(feature = "system_clipboard")]