From fe1970d66cbee7838387dd979ed440fbefad9201 Mon Sep 17 00:00:00 2001 From: Sergey Linev Date: Fri, 30 Jan 2026 16:02:33 +0100 Subject: [PATCH 1/4] Fix - use column index to store decoded data in RNtuple It is more easier to process it --- modules/rntuple.mjs | 68 +++++++++++++-------------------------------- 1 file changed, 19 insertions(+), 49 deletions(-) diff --git a/modules/rntuple.mjs b/modules/rntuple.mjs index 939647eb6..691f80320 100644 --- a/modules/rntuple.mjs +++ b/modules/rntuple.mjs @@ -925,24 +925,23 @@ async function readHeaderFooter(tuple) { function readEntry(rntuple, fieldName, entryIndex) { const builder = rntuple.builder, field = builder.fieldDescriptors.find(f => f.fieldName === fieldName), - fieldData = rntuple._clusterData[fieldName]; + columns = rntuple.fieldToColumns[fieldName]; if (!field) throw new Error(`No descriptor for field ${fieldName}`); - if (!fieldData) - throw new Error(`No data for field ${fieldName}`); + if (!columns) + throw new Error(`No columns field ${fieldName}`); - // Detect and decode string fields - if (Array.isArray(fieldData) && fieldData.length === 2) { - const [offsets, payload] = fieldData, + if (field.typeName === 'std::string') { + // string extracted from two columns + const offsets = rntuple._clusterData[columns[0].index][0], + payload = rntuple._clusterData[columns[1].index][0], start = entryIndex === 0 ? 0 : Number(offsets[entryIndex - 1]), - end = Number(offsets[entryIndex]), - decoded = payload.slice(start, end).join(''); // Convert to string - return decoded; + end = Number(offsets[entryIndex]); + return payload.slice(start, end).join(''); // Convert to string } - - // Fallback: primitive type (e.g. int, float) - return fieldData[0][entryIndex]; + const values = rntuple._clusterData[columns[0].index]; + return values[0][entryIndex]; } /** @summary Return field name for specified branch index @@ -1040,7 +1039,7 @@ function readNextCluster(rntuple, selector) { }); return Promise.all(unzipPromises).then(unzipBlobs => { - rntuple._clusterData = {}; // store deserialized data per field + rntuple._clusterData = {}; // store deserialized data per column index for (let i = 0; i < unzipBlobs.length; ++i) { const blob = unzipBlobs[i]; @@ -1051,54 +1050,25 @@ function readNextCluster(rntuple, selector) { page, colDesc } = pages[i], - field = builder.fieldDescriptors[colDesc.fieldId], - values = builder.deserializePage(blob, colDesc, page); + field = builder.fieldDescriptors[colDesc.fieldId], + values = builder.deserializePage(blob, colDesc, page); // Support multiple representations (e.g., string fields with offsets + payload) - if (!rntuple._clusterData[field.fieldName]) - rntuple._clusterData[field.fieldName] = []; - - // splitting string fields into offset and payload components - if (field.typeName === 'std::string') { - if ( - colDesc.coltype === ENTupleColumnType.kIndex64 || - colDesc.coltype === ENTupleColumnType.kIndex32 || - colDesc.coltype === ENTupleColumnType.kSplitIndex64 || - colDesc.coltype === ENTupleColumnType.kSplitIndex32 - ) // Index64/Index32 - rntuple._clusterData[field.fieldName][0] = values; // Offsets - else if (colDesc.coltype === ENTupleColumnType.kChar) - rntuple._clusterData[field.fieldName][1] = values; // Payload - else - throw new Error(`Unsupported column type for string field: ${colDesc.coltype}`); - } else - rntuple._clusterData[field.fieldName][0] = values; - } + if (!rntuple._clusterData[colDesc.index]) + rntuple._clusterData[colDesc.index] = []; - // Ensure string fields have ending offset for proper reconstruction of the last entry - for (const fieldName of selectedFields) { - const field = builder.fieldDescriptors.find(f => f.fieldName === fieldName), - colData = rntuple._clusterData[fieldName]; - if (field.typeName === 'std::string') { - if (!Array.isArray(colData) || colData.length !== 2) - throw new Error(`String field '${fieldName}' must have 2 columns`); - if (colData[0].length !== builder.clusterSummaries[clusterIndex].numEntries) - throw new Error(`Malformed string field '${fieldName}': missing final offset`); - } + rntuple._clusterData[colDesc.index].push(values); } const numEntries = clusterSummary.numEntries; for (let i = 0; i < numEntries; ++i) { for (let b = 0; b < selector.numBranches(); ++b) { const fieldName = getSelectorFieldName(selector, b), - tgtName = selector.nameOfBranch(b), - values = rntuple._clusterData[fieldName]; + tgtName = selector.nameOfBranch(b); - if (!values) - throw new Error(`Missing values for selected field: ${fieldName}`); selector.tgtobj[tgtName] = readEntry(rntuple, fieldName, i); } - selector.Process(); + selector.Process(i); } selector.Terminate(true); From 7eb59d0fb7b7e8dce04bdbabf9b32bca58cf4fc3 Mon Sep 17 00:00:00 2001 From: Sergey Linev Date: Fri, 30 Jan 2026 16:13:14 +0100 Subject: [PATCH 2/4] Fix - correctly use pages in RNtuple When each column in the cluster spitted on the pages, one need to correctly detect page from which values should be extracted --- modules/rntuple.mjs | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/modules/rntuple.mjs b/modules/rntuple.mjs index 691f80320..42a655edb 100644 --- a/modules/rntuple.mjs +++ b/modules/rntuple.mjs @@ -922,7 +922,7 @@ async function readHeaderFooter(tuple) { }); } -function readEntry(rntuple, fieldName, entryIndex) { +function readEntry(rntuple, fieldName, clusterIndex, entryIndex) { const builder = rntuple.builder, field = builder.fieldDescriptors.find(f => f.fieldName === fieldName), columns = rntuple.fieldToColumns[fieldName]; @@ -932,16 +932,27 @@ function readEntry(rntuple, fieldName, entryIndex) { if (!columns) throw new Error(`No columns field ${fieldName}`); + + const pages = builder.pageLocations[clusterIndex]?.[columns[0].index]?.pages; + if (!pages) + throw new Error(`No pages found ${fieldName}`); + + let pageid = 0; + while ((pageid < pages.length - 1) && (entryIndex >= Number(pages[pageid].numElements))) { + entryIndex -= Number(pages[pageid].numElements); + pageid++; + } + if (field.typeName === 'std::string') { // string extracted from two columns - const offsets = rntuple._clusterData[columns[0].index][0], - payload = rntuple._clusterData[columns[1].index][0], + const offsets = rntuple._clusterData[columns[0].index][pageid], + payload = rntuple._clusterData[columns[1].index][pageid], start = entryIndex === 0 ? 0 : Number(offsets[entryIndex - 1]), end = Number(offsets[entryIndex]); return payload.slice(start, end).join(''); // Convert to string } const values = rntuple._clusterData[columns[0].index]; - return values[0][entryIndex]; + return values[pageid][entryIndex]; } /** @summary Return field name for specified branch index @@ -1066,7 +1077,7 @@ function readNextCluster(rntuple, selector) { const fieldName = getSelectorFieldName(selector, b), tgtName = selector.nameOfBranch(b); - selector.tgtobj[tgtName] = readEntry(rntuple, fieldName, i); + selector.tgtobj[tgtName] = readEntry(rntuple, fieldName, clusterIndex, i); } selector.Process(i); } From 9ec656d38b35c949b9d6dfd578239da37c0e2524 Mon Sep 17 00:00:00 2001 From: Sergey Linev Date: Fri, 30 Jan 2026 16:19:11 +0100 Subject: [PATCH 3/4] Fix - process all clusters in RNtuple --- modules/rntuple.mjs | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/modules/rntuple.mjs b/modules/rntuple.mjs index 42a655edb..5986bd5a1 100644 --- a/modules/rntuple.mjs +++ b/modules/rntuple.mjs @@ -963,11 +963,11 @@ function getSelectorFieldName(selector, i) { } // Read and process the next data cluster from the RNTuple -function readNextCluster(rntuple, selector) { +async function readNextCluster(rntuple, selector) { const builder = rntuple.builder; // Add validation - if (!builder.clusterSummaries || builder.clusterSummaries.length === 0) + if (!builder.clusterSummaries) throw new Error('No cluster summaries available - possibly incomplete file reading'); const clusterIndex = selector.currentCluster, @@ -977,6 +977,11 @@ function readNextCluster(rntuple, selector) { // Collect only selected field names from selector selectedFields = []; + if (!clusterSummary) { + selector.Terminate(clusterIndex > 0); + return false; + } + for (let i = 0; i < selector.numBranches(); ++i) selectedFields.push(getSelectorFieldName(selector, i)); @@ -1003,7 +1008,7 @@ function readNextCluster(rntuple, selector) { // Early exit if no pages to read (i.e., no selected fields matched) if (pages.length === 0) { selector.Terminate(false); - return Promise.resolve(); + return false; } // Build flat array of [offset, size, offset, size, ...] to read pages @@ -1079,10 +1084,10 @@ function readNextCluster(rntuple, selector) { selector.tgtobj[tgtName] = readEntry(rntuple, fieldName, clusterIndex, i); } - selector.Process(i); + selector.Process(selector.currentEntry++); } - selector.Terminate(true); + return readNextCluster(rntuple, selector); }); }); } @@ -1093,6 +1098,7 @@ function rntupleProcess(rntuple, selector, args) { return readHeaderFooter(rntuple).then(() => { selector.Begin(); selector.currentCluster = 0; + selector.currentEntry = 0; return readNextCluster(rntuple, selector, args); }).then(() => selector); } From 8dbd7a6a449f30da0c5279c61d26bcf961037e6c Mon Sep 17 00:00:00 2001 From: Sergey Linev Date: Fri, 30 Jan 2026 16:31:46 +0100 Subject: [PATCH 4/4] Adjust syntax in RNtuple --- modules/rntuple.mjs | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/modules/rntuple.mjs b/modules/rntuple.mjs index 5986bd5a1..fe29976ec 100644 --- a/modules/rntuple.mjs +++ b/modules/rntuple.mjs @@ -1062,12 +1062,8 @@ async function readNextCluster(rntuple, selector) { // Ensure blob is a DataView if (!(blob instanceof DataView)) throw new Error(`Invalid blob type for page ${i}: ${Object.prototype.toString.call(blob)}`); - const { - page, - colDesc - } = pages[i], - field = builder.fieldDescriptors[colDesc.fieldId], - values = builder.deserializePage(blob, colDesc, page); + const colDesc = pages[i].colDesc, + values = builder.deserializePage(blob, colDesc, pages[i].page); // Support multiple representations (e.g., string fields with offsets + payload) if (!rntuple._clusterData[colDesc.index])