@@ -638,7 +638,26 @@ TEST(memory_search_long_text_sections) {
638638 matched ++ ;
639639 }
640640
641- printf ("(%d/%d sections retrieved) " , matched , n_cases );
641+ // Surface aggregate per-chunk metadata for the underlying long-text
642+ // corpus (one row in dbmem_content, multiple chunks in dbmem_vault).
643+ char long_text_hash [DBMEM_HASH_STR_MAXLEN ] = {0 };
644+ sqlite3_stmt * hstmt = NULL ;
645+ int hrc = sqlite3_prepare_v2 (db ,
646+ "SELECT hash FROM dbmem_content WHERE context = 'long-text' LIMIT 1;" ,
647+ -1 , & hstmt , NULL );
648+ int chunk_count = 0 , min_tokens = 0 , min_truncated = 0 , max_truncated = 0 ;
649+ if (hrc == SQLITE_OK && sqlite3_step (hstmt ) == SQLITE_ROW ) {
650+ snprintf (long_text_hash , sizeof (long_text_hash ), "%s" ,
651+ (const char * )sqlite3_column_text (hstmt , 0 ));
652+ sqlite3_finalize (hstmt );
653+ get_vault_metadata (long_text_hash , & chunk_count , & min_tokens ,
654+ & min_truncated , & max_truncated );
655+ } else {
656+ if (hstmt ) sqlite3_finalize (hstmt );
657+ }
658+
659+ printf ("(%d/%d sections retrieved; %d chunks min_n_tok=%d any_trunc=%d) " ,
660+ matched , n_cases , chunk_count , min_tokens , max_truncated );
642661
643662 ASSERT_SQL_OK (db , "SELECT memory_set_option('min_score', 0.7);" );
644663}
@@ -766,17 +785,24 @@ TEST(memory_search_under_token_limit) {
766785 ASSERT (long_chunk_bytes > 4500 );
767786
768787 int chunk_count = 0 , min_tokens = 0 , min_truncated = 0 , max_truncated = 0 ;
788+ int short_n_tokens = 0 , short_truncated = 0 ;
789+ int long_n_tokens = 0 , long_truncated = 0 ;
790+
769791 rc = get_vault_metadata (short_hash , & chunk_count , & min_tokens , & min_truncated , & max_truncated );
770792 ASSERT (rc == SQLITE_OK );
771793 ASSERT (chunk_count == 1 );
772794 ASSERT (min_tokens > 0 );
773795 ASSERT (min_truncated == 0 && max_truncated == 0 );
796+ short_n_tokens = min_tokens ;
797+ short_truncated = max_truncated ;
774798
775799 rc = get_vault_metadata (long_hash , & chunk_count , & min_tokens , & min_truncated , & max_truncated );
776800 ASSERT (rc == SQLITE_OK );
777801 ASSERT (chunk_count == 1 );
778802 ASSERT (min_tokens > 0 );
779803 ASSERT (min_truncated == 0 && max_truncated == 0 );
804+ long_n_tokens = min_tokens ;
805+ long_truncated = max_truncated ;
780806
781807 // Same query as the truncation test; with the full chunk embedded we
782808 // expect both the short ref and the long chunk to surface in top-10.
@@ -805,8 +831,9 @@ TEST(memory_search_under_token_limit) {
805831 ASSERT (short_rank >= 0 );
806832 ASSERT (long_rank >= 0 );
807833
808- printf ("(%d bytes; short rank=%d score=%.3f, long rank=%d score=%.3f) " ,
809- long_chunk_bytes , short_rank , short_score , long_rank , long_score );
834+ printf ("(short: n_tok=%d trunc=%d rank=%d score=%.3f; long: %d bytes n_tok=%d trunc=%d rank=%d score=%.3f) " ,
835+ short_n_tokens , short_truncated , short_rank , short_score ,
836+ long_chunk_bytes , long_n_tokens , long_truncated , long_rank , long_score );
810837
811838 ASSERT_SQL_OK (db , "SELECT memory_set_option('skip_semantic', 0);" );
812839 ASSERT_SQL_OK (db , "SELECT memory_set_option('max_tokens', 400);" );
@@ -845,11 +872,14 @@ TEST(memory_search_truncation_signature) {
845872 ASSERT_SQL_OK (db , "SELECT memory_set_option('min_score', 0.0);" );
846873
847874 // Short reference (~50 tokens), fully embedded, entirely about the topic.
875+ // Trailing sentence differs per test so memory_add_text's content-hash
876+ // idempotency doesn't collapse this insert into a no-op of an earlier
877+ // test's identical SHORT_REF.
848878 static const char * SHORT_REF =
849879 "Hydrothermal vents on the deep ocean floor sustain chemosynthetic "
850880 "microbial ecosystems independent of sunlight. Tubeworms and "
851881 "thermophilic archaea metabolize sulfur compounds emitted by the "
852- "vent fluids in total darkness." ;
882+ "vent fluids in total darkness. Truncation-signature reference. " ;
853883
854884 sqlite3_stmt * stmt = NULL ;
855885 int rc = sqlite3_prepare_v2 (db ,
@@ -945,17 +975,24 @@ TEST(memory_search_truncation_signature) {
945975 ASSERT (long_chunk_bytes > 9000 );
946976
947977 int chunk_count = 0 , min_tokens = 0 , min_truncated = 0 , max_truncated = 0 ;
978+ int short_n_tokens = 0 , short_truncated = 0 ;
979+ int long_n_tokens = 0 , long_truncated = 0 ;
980+
948981 rc = get_vault_metadata (short_hash , & chunk_count , & min_tokens , & min_truncated , & max_truncated );
949982 ASSERT (rc == SQLITE_OK );
950983 ASSERT (chunk_count == 1 );
951984 ASSERT (min_tokens > 0 );
952985 ASSERT (min_truncated == 0 && max_truncated == 0 );
986+ short_n_tokens = min_tokens ;
987+ short_truncated = max_truncated ;
953988
954989 rc = get_vault_metadata (long_hash , & chunk_count , & min_tokens , & min_truncated , & max_truncated );
955990 ASSERT (rc == SQLITE_OK );
956991 ASSERT (chunk_count == 1 );
957992 ASSERT (min_tokens > 0 );
958993 ASSERT (min_truncated == 1 && max_truncated == 1 );
994+ long_n_tokens = min_tokens ;
995+ long_truncated = max_truncated ;
959996
960997 // Query for the topic that appears throughout the short reference and
961998 // only in the *tail* of the long chunk. Paraphrased so any residual FTS
@@ -984,14 +1021,16 @@ TEST(memory_search_truncation_signature) {
9841021
9851022 ASSERT (short_rank >= 0 );
9861023 if (long_rank == -1 ) {
987- printf ("(short rank=%d score=%.3f, long absent from top-10) " ,
988- short_rank , short_score );
1024+ printf ("(short: n_tok=%d trunc=%d rank=%d score=%.3f; long: %d bytes n_tok=%d trunc=%d absent from top-10) " ,
1025+ short_n_tokens , short_truncated , short_rank , short_score ,
1026+ long_chunk_bytes , long_n_tokens , long_truncated );
9891027 } else {
9901028 // With a fully-embedded long chunk we'd expect comparable rankings;
9911029 // truncation pushes the long chunk strictly below the short ref.
9921030 ASSERT (short_rank < long_rank );
993- printf ("(short rank=%d score=%.3f, long rank=%d score=%.3f) " ,
994- short_rank , short_score , long_rank , long_score );
1031+ printf ("(short: n_tok=%d trunc=%d rank=%d score=%.3f; long: %d bytes n_tok=%d trunc=%d rank=%d score=%.3f) " ,
1032+ short_n_tokens , short_truncated , short_rank , short_score ,
1033+ long_chunk_bytes , long_n_tokens , long_truncated , long_rank , long_score );
9951034 }
9961035
9971036 ASSERT_SQL_OK (db , "SELECT memory_set_option('skip_semantic', 0);" );
@@ -1021,11 +1060,13 @@ TEST(memory_search_truncation_near_model_context) {
10211060 ASSERT_SQL_OK (db , "SELECT memory_set_option('text_weight', 0.0);" );
10221061 ASSERT_SQL_OK (db , "SELECT memory_set_option('min_score', 0.0);" );
10231062
1063+ // Trailing sentence differs from the other tests' SHORT_REFs so the
1064+ // content-hash idempotency in memory_add_text doesn't collapse the insert.
10241065 static const char * SHORT_REF =
10251066 "Hydrothermal vents on the deep ocean floor sustain chemosynthetic "
10261067 "microbial ecosystems independent of sunlight. Tubeworms and "
10271068 "thermophilic archaea metabolize sulfur compounds emitted by the "
1028- "vent fluids in total darkness." ;
1069+ "vent fluids in total darkness. Near-context reference. " ;
10291070
10301071 sqlite3_stmt * stmt = NULL ;
10311072 int rc = sqlite3_prepare_v2 (db ,
@@ -1115,17 +1156,24 @@ TEST(memory_search_truncation_near_model_context) {
11151156 ASSERT (long_chunk_bytes > 18000 );
11161157
11171158 int chunk_count = 0 , min_tokens = 0 , min_truncated = 0 , max_truncated = 0 ;
1159+ int short_n_tokens = 0 , short_truncated = 0 ;
1160+ int long_n_tokens = 0 , long_truncated = 0 ;
1161+
11181162 rc = get_vault_metadata (short_hash , & chunk_count , & min_tokens , & min_truncated , & max_truncated );
11191163 ASSERT (rc == SQLITE_OK );
11201164 ASSERT (chunk_count == 1 );
11211165 ASSERT (min_tokens > 0 );
11221166 ASSERT (min_truncated == 0 && max_truncated == 0 );
1167+ short_n_tokens = min_tokens ;
1168+ short_truncated = max_truncated ;
11231169
11241170 rc = get_vault_metadata (long_hash , & chunk_count , & min_tokens , & min_truncated , & max_truncated );
11251171 ASSERT (rc == SQLITE_OK );
11261172 ASSERT (chunk_count == 1 );
11271173 ASSERT (min_tokens > 0 );
11281174 ASSERT (min_truncated == 1 && max_truncated == 1 );
1175+ long_n_tokens = min_tokens ;
1176+ long_truncated = max_truncated ;
11291177
11301178 rc = sqlite3_prepare_v2 (db ,
11311179 "SELECT hash, ranking FROM memory_search("
@@ -1151,12 +1199,14 @@ TEST(memory_search_truncation_near_model_context) {
11511199
11521200 ASSERT (short_rank >= 0 );
11531201 if (long_rank == -1 ) {
1154- printf ("(%d bytes; short rank=%d score=%.3f, long absent from top-10) " ,
1155- long_chunk_bytes , short_rank , short_score );
1202+ printf ("(short: n_tok=%d trunc=%d rank=%d score=%.3f; long: %d bytes n_tok=%d trunc=%d absent from top-10) " ,
1203+ short_n_tokens , short_truncated , short_rank , short_score ,
1204+ long_chunk_bytes , long_n_tokens , long_truncated );
11561205 } else {
11571206 ASSERT (short_rank < long_rank );
1158- printf ("(%d bytes; short rank=%d score=%.3f, long rank=%d score=%.3f) " ,
1159- long_chunk_bytes , short_rank , short_score , long_rank , long_score );
1207+ printf ("(short: n_tok=%d trunc=%d rank=%d score=%.3f; long: %d bytes n_tok=%d trunc=%d rank=%d score=%.3f) " ,
1208+ short_n_tokens , short_truncated , short_rank , short_score ,
1209+ long_chunk_bytes , long_n_tokens , long_truncated , long_rank , long_score );
11601210 }
11611211
11621212 ASSERT_SQL_OK (db , "SELECT memory_set_option('skip_semantic', 0);" );
0 commit comments