From 6bda431723cf40b87ad640e40579f50f54a5a2d8 Mon Sep 17 00:00:00 2001 From: Lee Rhodes Date: Mon, 22 Dec 2025 14:11:01 -0800 Subject: [PATCH 1/4] Fix ArrayOfStringsSummary and add .mvn to root. --- .gitignore | 1 + .../tuple/strings/ArrayOfStringsSummary.java | 35 +++++----- .../strings/ArrayOfStringsSketchTest.java | 25 +++++-- .../ArrayOfStringsSummary_Issue699.java | 69 +++++++++++++++++++ 4 files changed, 106 insertions(+), 24 deletions(-) create mode 100644 src/test/java/org/apache/datasketches/tuple/strings/ArrayOfStringsSummary_Issue699.java diff --git a/.gitignore b/.gitignore index f03c5078f..f0a5c0998 100644 --- a/.gitignore +++ b/.gitignore @@ -15,6 +15,7 @@ # Additional tools .clover/ +.mvn/ # OSX files **/.DS_Store diff --git a/src/main/java/org/apache/datasketches/tuple/strings/ArrayOfStringsSummary.java b/src/main/java/org/apache/datasketches/tuple/strings/ArrayOfStringsSummary.java index 4197cd285..5008fecd5 100644 --- a/src/main/java/org/apache/datasketches/tuple/strings/ArrayOfStringsSummary.java +++ b/src/main/java/org/apache/datasketches/tuple/strings/ArrayOfStringsSummary.java @@ -36,19 +36,16 @@ */ public final class ArrayOfStringsSummary implements UpdatableSummary { - private String[] stringArr = null; + private String[] stringArr = new String[] {}; //empty string array; /** * No argument constructor. */ - ArrayOfStringsSummary() { //required for ArrayOfStringsSummaryFactory - stringArr = null; - } + ArrayOfStringsSummary() {} //required for ArrayOfStringsSummaryFactory //Used by copy() and in test ArrayOfStringsSummary(final String[] stringArr) { - this.stringArr = stringArr.clone(); - checkNumNodes(stringArr.length); + update(stringArr); } //used by fromMemorySegment and in test @@ -87,10 +84,20 @@ public final class ArrayOfStringsSummary implements UpdatableSummary { this.stringArr = stringArr; } + //From UpdatableSummary + + @Override + public final ArrayOfStringsSummary update(final String[] value) { + if (value == null) { stringArr = new String[] {}; } + else { stringArr = value.clone(); } + return this; + } + + //From Summary + @Override public ArrayOfStringsSummary copy() { - final ArrayOfStringsSummary nodes = new ArrayOfStringsSummary(stringArr); - return nodes; + return new ArrayOfStringsSummary(stringArr); } @Override @@ -112,16 +119,6 @@ public byte[] toByteArray() { return out; } - //From UpdatableSummary - - @Override - public ArrayOfStringsSummary update(final String[] value) { - if (stringArr == null) { - stringArr = value.clone(); - } - return this; - } - //From Object @Override @@ -139,6 +136,8 @@ public boolean equals(final Object summary) { return thisStr.equals(thatStr); } + //Local + /** * Returns the nodes array for this summary. * @return the nodes array for this summary. diff --git a/src/test/java/org/apache/datasketches/tuple/strings/ArrayOfStringsSketchTest.java b/src/test/java/org/apache/datasketches/tuple/strings/ArrayOfStringsSketchTest.java index ef39e6b90..b5cf4a39a 100644 --- a/src/test/java/org/apache/datasketches/tuple/strings/ArrayOfStringsSketchTest.java +++ b/src/test/java/org/apache/datasketches/tuple/strings/ArrayOfStringsSketchTest.java @@ -49,22 +49,30 @@ public void checkSketch() { for (int i = 0; i < len; i++) { sketch1.update(strArrArr[i], strArrArr[i]); } + println("Sketch1"); + printSummaries(sketch1.iterator()); + sketch1.update(strArrArr[0], strArrArr[0]); //insert duplicate + println("Sketch1 updated with a duplicate"); printSummaries(sketch1.iterator()); - byte[] array = sketch1.toByteArray(); - MemorySegment wseg = MemorySegment.ofArray(array); + + MemorySegment wseg = MemorySegment.ofArray(sketch1.toByteArray()); ArrayOfStringsTupleSketch sketch2 = new ArrayOfStringsTupleSketch(wseg); + println("Sketch2 = Sketch1 via SerDe"); printSummaries(sketch2.iterator()); - checkSummaries(sketch2, sketch2); + checkSummariesEqual(sketch2, sketch2); String[] strArr3 = {"g", "h" }; sketch2.update(strArr3, strArr3); - + println("Sketch2 with a new row"); + printSummaries(sketch2.iterator()); + TupleUnion union = new TupleUnion<>(new ArrayOfStringsSummarySetOperations()); union.union(sketch1); union.union(sketch2); CompactTupleSketch csk = union.getResult(); - //printSummaries(csk.iterator()); + println("Result of union of Sketch1, Sketch2"); + printSummaries(csk.iterator()); assertEquals(csk.getRetainedEntries(), 4); TupleIntersection inter = @@ -72,17 +80,21 @@ public void checkSketch() { inter.intersect(sketch1); inter.intersect(sketch2); csk = inter.getResult(); + println("Intersect Sketch1, Sketch2"); + printSummaries(csk.iterator()); assertEquals(csk.getRetainedEntries(), 3); TupleAnotB aNotB = new TupleAnotB<>(); aNotB.setA(sketch2); aNotB.notB(sketch1); csk = aNotB.getResult(true); + println("AnotB(Sketch2, Sketch1)"); + printSummaries(csk.iterator()); assertEquals(csk.getRetainedEntries(), 1); } - private static void checkSummaries(ArrayOfStringsTupleSketch sk1, ArrayOfStringsTupleSketch sk2) { + private static void checkSummariesEqual(ArrayOfStringsTupleSketch sk1, ArrayOfStringsTupleSketch sk2) { TupleSketchIterator it1 = sk1.iterator(); TupleSketchIterator it2 = sk2.iterator(); while(it1.next() && it2.next()) { @@ -100,6 +112,7 @@ static void printSummaries(TupleSketchIterator it) { } println(""); } + println(""); } @Test diff --git a/src/test/java/org/apache/datasketches/tuple/strings/ArrayOfStringsSummary_Issue699.java b/src/test/java/org/apache/datasketches/tuple/strings/ArrayOfStringsSummary_Issue699.java new file mode 100644 index 000000000..febe924f9 --- /dev/null +++ b/src/test/java/org/apache/datasketches/tuple/strings/ArrayOfStringsSummary_Issue699.java @@ -0,0 +1,69 @@ +package org.apache.datasketches.tuple.strings; + +import static org.apache.datasketches.common.Util.LS; + +import org.apache.datasketches.theta.UpdatableThetaSketch; +import org.apache.datasketches.tuple.TupleSketchIterator; +import org.apache.datasketches.tuple.TupleUnion; +import org.testng.annotations.Test; + +public class ArrayOfStringsSummary_Issue699 { + UpdatableThetaSketch thetaSk = UpdatableThetaSketch.builder().build(); + ArrayOfStringsTupleSketch tupleSk = new ArrayOfStringsTupleSketch(); + TupleUnion union = new TupleUnion<>(new ArrayOfStringsSummarySetOperations()); + + @Test + void go() { + thetaSk.update("a"); + thetaSk.update("b"); + thetaSk.update("c"); + + tupleSk.update("a", new String[] {"x", "y"}); + tupleSk.update("b", new String[] {"z"}); + tupleSk.update("e", new String[] {"x", "z"}); + + println("Print Tuple Summary before union"); + printSummaries(tupleSk.iterator()); + + union.union(tupleSk); + union.union(thetaSk, new ArrayOfStringsSummary()); //enable this or the next + //union.union(thetaSk, new ArrayOfStringsSummary(new String[] {"u", "v"})); //optional association + + println("Print Tuple Summary after union"); + printSummaries(union.getResult().iterator()); + } + + @Test + void checkCopy() { + ArrayOfStringsSummary aoss = new ArrayOfStringsSummary(); + ArrayOfStringsSummary aoss2 = aoss.copy(); + } + + @Test + void checkToByteArray() { + ArrayOfStringsSummary aoss = new ArrayOfStringsSummary(); + byte[] bytes = aoss.toByteArray(); + println("byte[].length = " + bytes.length); + } + + + static void printSummaries(TupleSketchIterator it) { + while (it.next()) { + String[] strArr = it.getSummary().getValue(); + if (strArr.length == 0) { print("-"); } //illustrates an empty string array + for (String s : strArr) { + print(s + ", "); + } + println(""); + } + println(""); + } + + private static void println(Object o) { + print(o + LS); + } + + private static void print(Object o) { + //System.out.print(o.toString()); + } +} From b1d9bb80a9ef8864d7328d0efae2e858d6ba858d Mon Sep 17 00:00:00 2001 From: Lee Rhodes Date: Mon, 22 Dec 2025 14:27:14 -0800 Subject: [PATCH 2/4] Add maven.config to .mvn --- .mvn/maven.config | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 .mvn/maven.config diff --git a/.mvn/maven.config b/.mvn/maven.config new file mode 100644 index 000000000..268ab97a0 --- /dev/null +++ b/.mvn/maven.config @@ -0,0 +1,6 @@ +# suppresses the warning: +# Direct modification of testCompileSourceRoots through add() is deprecated and will not work in Maven 4.0.0. Please use the add/remove methods instead. +# If you're using a plugin that causes this warning, please upgrade to the latest version and report an issue if the warning persists. +# To disable these warnings, set -Dmaven.project.sourceRoots.warningsDisabled=true on the command line, in the .mvn/maven.config file, +# or in project POM properties. +-Dmaven.project.sourceRoots.warningsDisabled=true From 20eeb809cf976a10f8dd03d60c1df2074e715dc6 Mon Sep 17 00:00:00 2001 From: Lee Rhodes Date: Mon, 22 Dec 2025 14:48:11 -0800 Subject: [PATCH 3/4] clean up test to remove unused variable. --- .../tuple/strings/ArrayOfStringsSummary_Issue699.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/test/java/org/apache/datasketches/tuple/strings/ArrayOfStringsSummary_Issue699.java b/src/test/java/org/apache/datasketches/tuple/strings/ArrayOfStringsSummary_Issue699.java index febe924f9..2c623b7c6 100644 --- a/src/test/java/org/apache/datasketches/tuple/strings/ArrayOfStringsSummary_Issue699.java +++ b/src/test/java/org/apache/datasketches/tuple/strings/ArrayOfStringsSummary_Issue699.java @@ -36,7 +36,7 @@ void go() { @Test void checkCopy() { ArrayOfStringsSummary aoss = new ArrayOfStringsSummary(); - ArrayOfStringsSummary aoss2 = aoss.copy(); + aoss.copy(); //if null this will throw } @Test From 58fbbf97864832be18421a48ad13dce4e3a2ea50 Mon Sep 17 00:00:00 2001 From: Lee Rhodes Date: Mon, 22 Dec 2025 14:59:13 -0800 Subject: [PATCH 4/4] Added license. --- .../ArrayOfStringsSummary_Issue699.java | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/src/test/java/org/apache/datasketches/tuple/strings/ArrayOfStringsSummary_Issue699.java b/src/test/java/org/apache/datasketches/tuple/strings/ArrayOfStringsSummary_Issue699.java index 2c623b7c6..7e68fc1de 100644 --- a/src/test/java/org/apache/datasketches/tuple/strings/ArrayOfStringsSummary_Issue699.java +++ b/src/test/java/org/apache/datasketches/tuple/strings/ArrayOfStringsSummary_Issue699.java @@ -1,3 +1,22 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + package org.apache.datasketches.tuple.strings; import static org.apache.datasketches.common.Util.LS;