Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions python/tests/detail/test_collection_dql.py
Original file line number Diff line number Diff line change
Expand Up @@ -731,7 +731,7 @@ def test_query_multivector_rrf(self, full_collection: Collection, doc_num):
)
expected_score = expected_rrf_scores[doc_id]
actual_score = doc.score
assert abs(actual_score - expected_score) < 1e-10, (
assert abs(actual_score - expected_score) < 1e-6, (
f"RRF score mismatch for document {doc_id}: expected {expected_score}, got {actual_score}"
)
assert doc.score <= prev_score, (
Expand Down Expand Up @@ -799,7 +799,7 @@ def test_query_multivector_weighted(
)
expected_score = expected_weighted_scores[doc_id]
actual_score = doc.score
assert abs(actual_score - expected_score) < 1e-10, (
assert abs(actual_score - expected_score) < 1e-6, (
f"score mismatch for document {doc_id}: expected {expected_score}, got {actual_score}"
)
assert doc.score <= prev_score, (
Expand Down
211 changes: 182 additions & 29 deletions python/tests/test_collection.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,11 +27,16 @@
InvertIndexParam,
LogLevel,
LogType,
MetricType,
OptimizeOption,
StatusCode,
Query,
VectorSchema,
)
from zvec.extension.multi_vector_reranker import (
RrfReRanker,
WeightedReRanker,
)

# ==================== Common ====================

Expand Down Expand Up @@ -60,9 +65,18 @@ def collection_schema():
dimension=128,
index_param=HnswIndexParam(),
),
VectorSchema(
"dense2",
DataType.VECTOR_FP32,
dimension=128,
index_param=HnswIndexParam(),
),
VectorSchema(
"sparse", DataType.SPARSE_VECTOR_FP32, index_param=HnswIndexParam()
),
VectorSchema(
"sparse2", DataType.SPARSE_VECTOR_FP32, index_param=HnswIndexParam()
),
],
)

Expand All @@ -78,7 +92,12 @@ def single_doc():
return Doc(
id=f"{id}",
fields={"id": id, "name": "test", "weight": 80.0, "height": id + 140},
vectors={"dense": [id + 0.1] * 128, "sparse": {1: 1.0, 2: 2.0, 3: 3.0}},
vectors={
"dense": [id + 0.1] * 128,
"dense2": [id + 0.2] * 128,
"sparse": {1: 1.0, 2: 2.0, 3: 3.0},
"sparse2": {4: 1.5, 5: 2.5, 6: 3.5},
},
)


Expand All @@ -88,7 +107,12 @@ def multiple_docs():
Doc(
id=f"{id}",
fields={"id": id, "name": "test", "weight": 80.0, "height": 210},
vectors={"dense": [id + 0.1] * 128, "sparse": {1: 1.0, 2: 2.0, 3: 3.0}},
vectors={
"dense": [id + 0.1] * 128,
"dense2": [id + 0.2] * 128,
"sparse": {1: 1.0, 2: 2.0, 3: 3.0},
"sparse2": {4: 1.5, 5: 2.5, 6: 3.5},
},
)
for id in range(1, 101)
]
Expand Down Expand Up @@ -182,9 +206,11 @@ def test_collection_stats(self, test_collection: Collection):
assert test_collection.stats is not None
stats = test_collection.stats
assert stats.doc_count == 0
assert len(stats.index_completeness) == 2
assert len(stats.index_completeness) == 4
assert stats.index_completeness["dense"] == 1
assert stats.index_completeness["dense2"] == 1
assert stats.index_completeness["sparse"] == 1
assert stats.index_completeness["sparse2"] == 1


# ----------------------------
Expand Down Expand Up @@ -449,7 +475,12 @@ def test_collection_insert_with_nullable_false_field(self, test_collection):
"id": 1,
"name": "test",
},
vectors={"dense": [1 + 0.1] * 128, "sparse": {1: 1.0, 2: 2.0, 3: 3.0}},
vectors={
"dense": [1 + 0.1] * 128,
"dense2": [1 + 0.2] * 128,
"sparse": {1: 1.0, 2: 2.0, 3: 3.0},
"sparse2": {4: 1.5, 5: 2.5, 6: 3.5},
},
)
result = test_collection.insert(doc)
assert bool(result)
Expand All @@ -465,7 +496,12 @@ def test_collection_insert_without_nullable_false_field(self, test_collection):
# without id, name
doc = Doc(
id="0",
vectors={"dense": [1 + 0.1] * 128, "sparse": {1: 1.0, 2: 2.0, 3: 3.0}},
vectors={
"dense": [1 + 0.1] * 128,
"dense2": [1 + 0.2] * 128,
"sparse": {1: 1.0, 2: 2.0, 3: 3.0},
"sparse2": {4: 1.5, 5: 2.5, 6: 3.5},
},
)
with pytest.raises(ValueError) as e:
# ValueError: Invalid doc: field[id] is required but not provided
Expand All @@ -478,7 +514,12 @@ def test_collection_insert_without_nullable_false_field(self, test_collection):
fields={
"id": 1,
},
vectors={"dense": [1 + 0.1] * 128, "sparse": {1: 1.0, 2: 2.0, 3: 3.0}},
vectors={
"dense": [1 + 0.1] * 128,
"dense2": [1 + 0.2] * 128,
"sparse": {1: 1.0, 2: 2.0, 3: 3.0},
"sparse2": {4: 1.5, 5: 2.5, 6: 3.5},
},
)
with pytest.raises(ValueError) as e:
test_collection.insert(doc)
Expand All @@ -494,7 +535,12 @@ def test_collection_insert_with_nullable_true_field(self, test_collection):
"id": 1,
"name": "test",
},
vectors={"dense": [1 + 0.1] * 128, "sparse": {1: 1.0, 2: 2.0, 3: 3.0}},
vectors={
"dense": [1 + 0.1] * 128,
"dense2": [1 + 0.2] * 128,
"sparse": {1: 1.0, 2: 2.0, 3: 3.0},
"sparse2": {4: 1.5, 5: 2.5, 6: 3.5},
},
)
result = test_collection.insert(doc)
assert bool(result)
Expand Down Expand Up @@ -969,70 +1015,177 @@ def test_collection_query_by_id(
def test_collection_query_multi_vector_with_same_field(
self, collection_with_multiple_docs: Collection, multiple_docs
):
with pytest.raises(ValueError):
# Multi-vector query on same field without reranker should raise ValueError
with pytest.raises(ValueError, match="Reranker is required"):
collection_with_multiple_docs.query(
[
Query(field_name="dense", vector=multiple_docs[0].vector("dense")),
Query(field_name="dense", vector=multiple_docs[0].vector("dense")),
Query(field_name="dense", vector=multiple_docs[1].vector("dense")),
]
)

@pytest.mark.skip(reason="TODO: This test case is pending implementation")
# Same field name with reranker should also raise ValueError
reranker = RrfReRanker(topn=10, rank_constant=60)
with pytest.raises(ValueError, match="appears more than once"):
collection_with_multiple_docs.query(
[
Query(field_name="dense", vector=multiple_docs[0].vector("dense")),
Query(field_name="dense", vector=multiple_docs[1].vector("dense")),
],
topk=10,
reranker=reranker,
)

def test_collection_query_by_dense_vector(
self, collection_with_multiple_docs: Collection, multiple_docs
):
pass
result = collection_with_multiple_docs.query(
Query(field_name="dense", vector=multiple_docs[0].vector("dense")),
topk=10,
)
assert len(result) > 0
assert len(result) <= 10

@pytest.mark.skip(reason="TODO: This test case is pending implementation")
def test_collection_query_by_sparse_vector(
self, collection_with_multiple_docs: Collection, multiple_docs
):
pass
result = collection_with_multiple_docs.query(
Query(field_name="sparse", vector=multiple_docs[0].vector("sparse")),
topk=10,
)
assert len(result) > 0
assert len(result) <= 10

@pytest.mark.skip(reason="TODO: This test case is pending implementation")
def test_collection_query_by_dense_vector_with_filter(
self, collection_with_multiple_docs: Collection, multiple_docs
):
pass
result = collection_with_multiple_docs.query(
Query(field_name="dense", vector=multiple_docs[0].vector("dense")),
topk=10,
filter="id > 50",
)
assert len(result) > 0
assert len(result) <= 10
for doc in result:
assert int(doc.id) > 50

@pytest.mark.skip(reason="TODO: This test case is pending implementation")
def test_collection_query_by_sparse_vector_with_filter(
self, collection_with_multiple_docs: Collection, multiple_docs
):
pass
result = collection_with_multiple_docs.query(
Query(field_name="sparse", vector=multiple_docs[0].vector("sparse")),
topk=10,
filter="id > 50",
)
assert len(result) > 0
assert len(result) <= 10
for doc in result:
assert int(doc.id) > 50

@pytest.mark.skip(reason="TODO: This test case is pending implementation")
def test_collection_query_with_rrf_reranker_by_multi_dense_vector(
self, collection_with_multiple_docs: Collection, multiple_docs
):
pass
"""Test multi-vector query with RRF reranker on multiple dense vectors."""
reranker = RrfReRanker(topn=10, rank_constant=60)
result = collection_with_multiple_docs.query(
[
Query(field_name="dense", vector=multiple_docs[0].vector("dense")),
Query(field_name="dense2", vector=multiple_docs[0].vector("dense2")),
],
topk=10,
reranker=reranker,
)
assert len(result) > 0
assert len(result) <= 10
# Results should have RRF-fused scores
for doc in result:
assert hasattr(doc, "score")

@pytest.mark.skip(reason="TODO: This test case is pending implementation")
def test_collection_query_with_rrf_reranker_by_multi_sparse_vector(
self, collection_with_multiple_docs: Collection, multiple_docs
):
pass
"""Test multi-vector query with RRF reranker on multiple sparse vectors."""
reranker = RrfReRanker(topn=10, rank_constant=60)
result = collection_with_multiple_docs.query(
[
Query(field_name="sparse", vector=multiple_docs[0].vector("sparse")),
Query(
field_name="sparse2",
vector=multiple_docs[0].vector("sparse2"),
),
],
topk=10,
reranker=reranker,
)
assert len(result) > 0
assert len(result) <= 10

@pytest.mark.skip(reason="TODO: This test case is pending implementation")
def test_collection_query_with_rrf_reranker_by_hybrid_vector(
self, collection_with_multiple_docs: Collection, multiple_docs
):
pass
"""Test multi-vector query with RRF reranker combining dense + sparse."""
reranker = RrfReRanker(topn=10, rank_constant=60)
result = collection_with_multiple_docs.query(
[
Query(field_name="dense", vector=multiple_docs[0].vector("dense")),
Query(field_name="sparse", vector=multiple_docs[0].vector("sparse")),
],
topk=10,
reranker=reranker,
)
assert len(result) > 0
assert len(result) <= 10

@pytest.mark.skip(reason="TODO: This test case is pending implementation")
def test_collection_query_with_weighted_reranker_by_multi_dense_vector(
self, collection_with_multiple_docs: Collection, multiple_docs
):
pass
"""Test multi-vector query with Weighted reranker on multiple dense vectors."""
weights = {"dense": 0.6, "dense2": 0.4}
reranker = WeightedReRanker(topn=10, metric=MetricType.IP, weights=weights)
result = collection_with_multiple_docs.query(
[
Query(field_name="dense", vector=multiple_docs[0].vector("dense")),
Query(field_name="dense2", vector=multiple_docs[0].vector("dense2")),
],
topk=10,
reranker=reranker,
)
assert len(result) > 0
assert len(result) <= 10

@pytest.mark.skip(reason="TODO: This test case is pending implementation")
def test_collection_query_with_weighted_reranker_by_multi_sparse_vector(
self, collection_with_multiple_docs: Collection, multiple_docs
):
pass
"""Test multi-vector query with Weighted reranker on multiple sparse vectors."""
weights = {"sparse": 0.6, "sparse2": 0.4}
reranker = WeightedReRanker(topn=10, metric=MetricType.IP, weights=weights)
result = collection_with_multiple_docs.query(
[
Query(field_name="sparse", vector=multiple_docs[0].vector("sparse")),
Query(
field_name="sparse2",
vector=multiple_docs[0].vector("sparse2"),
),
],
topk=10,
reranker=reranker,
)
assert len(result) > 0
assert len(result) <= 10

@pytest.mark.skip(reason="TODO: This test case is pending implementation")
def test_collection_query_with_weighted_reranker_by_hybrid_vector(
self, collection_with_multiple_docs: Collection, multiple_docs
):
pass
"""Test multi-vector query with Weighted reranker combining dense + sparse."""
weights = {"dense": 0.7, "sparse": 0.3}
reranker = WeightedReRanker(topn=10, metric=MetricType.IP, weights=weights)
result = collection_with_multiple_docs.query(
[
Query(field_name="dense", vector=multiple_docs[0].vector("dense")),
Query(field_name="sparse", vector=multiple_docs[0].vector("sparse")),
],
topk=10,
reranker=reranker,
)
assert len(result) > 0
assert len(result) <= 10
Loading
Loading