436 lines
14 KiB
Python
436 lines
14 KiB
Python
|
"""Tests for document querying"""
|
|||
|
|
|||
|
from datetime import datetime, timedelta, timezone
|
|||
|
|
|||
|
from typing import Literal
|
|||
|
|
|||
|
import pytest
|
|||
|
|
|||
|
from earthsnake.document.es4 import Es4Document
|
|||
|
from earthsnake.exc import ValidationError
|
|||
|
from earthsnake.identity import Identity
|
|||
|
from earthsnake.path import Path
|
|||
|
from earthsnake.query import (
|
|||
|
Cmp,
|
|||
|
HistoryMode,
|
|||
|
OrderByField,
|
|||
|
OrderDirection,
|
|||
|
Query,
|
|||
|
QueryFilter,
|
|||
|
QueryVerdict,
|
|||
|
)
|
|||
|
|
|||
|
|
|||
|
def test_init_negative_limit() -> None:
|
|||
|
"""Test if initialization fails with a negative limit"""
|
|||
|
|
|||
|
with pytest.raises(ValidationError) as ctx:
|
|||
|
Query(limit=-1)
|
|||
|
|
|||
|
assert str(ctx.value) == 'query limit must be 0 or greater'
|
|||
|
|
|||
|
|
|||
|
def test_init_mutual_exclusive_start_after() -> None:
|
|||
|
"""Test if start_after_local_index and start_after_path are mutually exclusive"""
|
|||
|
|
|||
|
with pytest.raises(ValidationError) as ctx:
|
|||
|
Query(start_after_local_index=1, start_after_path=Path('/test.txt'))
|
|||
|
|
|||
|
assert (
|
|||
|
str(ctx.value)
|
|||
|
== 'query start position should be either a local_index or a path, but not both'
|
|||
|
)
|
|||
|
|
|||
|
|
|||
|
def test_init_order_start_after_mismatch_path() -> None:
|
|||
|
"""Test if init fails if ordering by path but starting at a local index"""
|
|||
|
|
|||
|
with pytest.raises(ValidationError) as ctx:
|
|||
|
Query(start_after_local_index=1, order_by_field=OrderByField.PATH)
|
|||
|
|
|||
|
assert str(ctx.value) == 'query would order by path, but instructed to start at a local index'
|
|||
|
|
|||
|
|
|||
|
def test_init_order_start_after_mismatch_local_index() -> None:
|
|||
|
"""Test if init fails if ordering by local_index but starting at a path"""
|
|||
|
|
|||
|
with pytest.raises(ValidationError) as ctx:
|
|||
|
Query(start_after_path=Path('/test.txt'), order_by_field=OrderByField.LOCAL_INDEX)
|
|||
|
|
|||
|
assert str(ctx.value) == 'query would order by local index, but instructed to start at a path'
|
|||
|
|
|||
|
|
|||
|
def test_init_path_startswith_mismatch() -> None:
|
|||
|
"""Test if init fails if we match an exact path and a path prefix that doesn’t match path"""
|
|||
|
|
|||
|
with pytest.raises(ValidationError) as ctx:
|
|||
|
Query(flt=QueryFilter(path=Path('/test.txt'), path_prefix='/other.md'))
|
|||
|
|
|||
|
assert (
|
|||
|
str(ctx.value)
|
|||
|
== 'we were asked to match an exact path and a path prefix but they don’t match each other'
|
|||
|
)
|
|||
|
|
|||
|
|
|||
|
def test_init_path_endswith_mismatch() -> None:
|
|||
|
"""Test if init fails if we match an exact path and a path suffix that doesn’t match path"""
|
|||
|
|
|||
|
with pytest.raises(ValidationError) as ctx:
|
|||
|
Query(flt=QueryFilter(path=Path('/test.txt'), path_suffix='.md'))
|
|||
|
|
|||
|
assert (
|
|||
|
str(ctx.value)
|
|||
|
== 'we were asked to match an exact path and a path suffix but they don’t match each other'
|
|||
|
)
|
|||
|
|
|||
|
|
|||
|
def test_init_empty_prediction() -> None:
|
|||
|
"""Test if verdict is NOTHING if limit is zero"""
|
|||
|
|
|||
|
query = Query(limit=0)
|
|||
|
|
|||
|
assert query.prediction == QueryVerdict.NOTHING
|
|||
|
|
|||
|
|
|||
|
def test_init_filter_prediction() -> None:
|
|||
|
"""Test if verdict is SOME if we have a filter set"""
|
|||
|
|
|||
|
query = Query(flt=QueryFilter(path=Path('/test.txt')))
|
|||
|
|
|||
|
assert query.prediction == QueryVerdict.SOME
|
|||
|
|
|||
|
|
|||
|
@pytest.mark.parametrize(
|
|||
|
'history_mode,prediction',
|
|||
|
(
|
|||
|
pytest.param(HistoryMode.ALL, QueryVerdict.ALL),
|
|||
|
pytest.param(HistoryMode.LATEST, QueryVerdict.ALL_LATEST),
|
|||
|
),
|
|||
|
)
|
|||
|
def test_init_all_prediction(history_mode: HistoryMode, prediction: QueryVerdict) -> None:
|
|||
|
"""Test if verdict is ALL/ALL_LATEST if we don’t have any filtering parameters set"""
|
|||
|
|
|||
|
query = Query(history_mode=history_mode)
|
|||
|
|
|||
|
assert query.prediction == prediction
|
|||
|
|
|||
|
|
|||
|
def test_match_doc_no_filter(es4_document: Es4Document) -> None:
|
|||
|
"""Test if the empty filter works corretly"""
|
|||
|
|
|||
|
query = Query()
|
|||
|
|
|||
|
assert query.match_document(es4_document) is True
|
|||
|
|
|||
|
|
|||
|
def test_match_doc_path(es4_document: Es4Document) -> None:
|
|||
|
"""Test if the path works correctly"""
|
|||
|
query = Query(flt=QueryFilter(path=Path('/test.txt')))
|
|||
|
|
|||
|
es4_document.path = Path('/test.txt')
|
|||
|
assert query.match_document(es4_document)
|
|||
|
|
|||
|
es4_document.path = Path('/test.md')
|
|||
|
assert not query.match_document(es4_document)
|
|||
|
|
|||
|
|
|||
|
def test_match_doc_path_prefix(es4_document: Es4Document) -> None:
|
|||
|
"""Test if the path prefix filter works correctly"""
|
|||
|
query = Query(flt=QueryFilter(path_prefix='/test'))
|
|||
|
|
|||
|
es4_document.path = Path('/test.txt')
|
|||
|
assert query.match_document(es4_document)
|
|||
|
|
|||
|
es4_document.path = Path('/other.txt')
|
|||
|
assert not query.match_document(es4_document)
|
|||
|
|
|||
|
|
|||
|
def test_match_doc_path_suffix(es4_document: Es4Document) -> None:
|
|||
|
"""Test if the path suffix filter works correctly"""
|
|||
|
|
|||
|
query = Query(flt=QueryFilter(path_suffix='.txt'))
|
|||
|
|
|||
|
es4_document.path = Path('/test.txt')
|
|||
|
assert query.match_document(es4_document)
|
|||
|
|
|||
|
es4_document.path = Path('/test.md')
|
|||
|
assert not query.match_document(es4_document)
|
|||
|
|
|||
|
|
|||
|
def test_match_doc_author(es4_document: Es4Document, identity: Identity) -> None:
|
|||
|
"""Test if the author filter works correctly"""
|
|||
|
|
|||
|
query = Query(flt=QueryFilter(author=identity))
|
|||
|
|
|||
|
es4_document.author = identity
|
|||
|
assert query.match_document(es4_document)
|
|||
|
|
|||
|
es4_document.author = Identity.generate('name')
|
|||
|
assert not query.match_document(es4_document)
|
|||
|
|
|||
|
|
|||
|
@pytest.mark.parametrize('cmp_usage', ('explicit', 'implicit'))
|
|||
|
def test_match_timestamp_eq(
|
|||
|
cmp_usage: Literal['explicit', 'implicit'],
|
|||
|
es4_document: Es4Document,
|
|||
|
) -> None:
|
|||
|
"""Test if the timestamp filter works correctly with the EQ comparator"""
|
|||
|
|
|||
|
flt = QueryFilter(timestamp=datetime(2022, 5, 5, 8, 21, 11, 668993, tzinfo=timezone.utc))
|
|||
|
|
|||
|
if cmp_usage == 'explicit':
|
|||
|
flt['timestamp_comp'] = Cmp.EQ
|
|||
|
|
|||
|
query = Query(flt=flt)
|
|||
|
|
|||
|
assert query.match_document(es4_document)
|
|||
|
|
|||
|
es4_document.timestamp += timedelta(days=1)
|
|||
|
assert not query.match_document(es4_document)
|
|||
|
|
|||
|
|
|||
|
def test_match_timestamp_gt(es4_document: Es4Document) -> None:
|
|||
|
"""Test if the timestamp filter works correctly with the GT comparator"""
|
|||
|
|
|||
|
flt = QueryFilter(
|
|||
|
timestamp=datetime(2022, 5, 5, 8, 21, 11, 668992, tzinfo=timezone.utc),
|
|||
|
timestamp_comp=Cmp.GT,
|
|||
|
)
|
|||
|
|
|||
|
query = Query(flt=flt)
|
|||
|
|
|||
|
assert query.match_document(es4_document)
|
|||
|
|
|||
|
es4_document.timestamp -= timedelta(microseconds=1)
|
|||
|
assert not query.match_document(es4_document)
|
|||
|
|
|||
|
|
|||
|
def test_match_timestamp_lt(es4_document: Es4Document) -> None:
|
|||
|
"""Test if the timestamp filter works correctly with the LT comparator"""
|
|||
|
|
|||
|
flt = QueryFilter(
|
|||
|
timestamp=datetime(2022, 5, 5, 8, 21, 11, 668994, tzinfo=timezone.utc),
|
|||
|
timestamp_comp=Cmp.LT,
|
|||
|
)
|
|||
|
|
|||
|
query = Query(flt=flt)
|
|||
|
|
|||
|
assert query.match_document(es4_document)
|
|||
|
|
|||
|
es4_document.timestamp += timedelta(microseconds=1)
|
|||
|
assert not query.match_document(es4_document)
|
|||
|
|
|||
|
|
|||
|
@pytest.mark.parametrize('cmp_usage', ('explicit', 'implicit'))
|
|||
|
def test_match_content_length_eq(
|
|||
|
cmp_usage: Literal['explicit', 'implicit'],
|
|||
|
es4_document: Es4Document,
|
|||
|
) -> None:
|
|||
|
"""Test if the content_length filter works correctly with the EQ comparator"""
|
|||
|
|
|||
|
flt = QueryFilter(content_length=4)
|
|||
|
|
|||
|
if cmp_usage == 'explicit':
|
|||
|
flt['content_length_comp'] = Cmp.EQ
|
|||
|
|
|||
|
query = Query(flt=flt)
|
|||
|
|
|||
|
assert query.match_document(es4_document)
|
|||
|
|
|||
|
es4_document.content = 'other'
|
|||
|
assert not query.match_document(es4_document)
|
|||
|
|
|||
|
|
|||
|
def test_match_content_length_gt(es4_document: Es4Document) -> None:
|
|||
|
"""Test if the content_length filter works correctly with the GT comparator"""
|
|||
|
|
|||
|
flt = QueryFilter(content_length=4, content_length_comp=Cmp.GT)
|
|||
|
|
|||
|
query = Query(flt=flt)
|
|||
|
|
|||
|
assert not query.match_document(es4_document)
|
|||
|
|
|||
|
es4_document.content = 'other'
|
|||
|
assert query.match_document(es4_document)
|
|||
|
|
|||
|
|
|||
|
def test_match_content_length_lt(es4_document: Es4Document) -> None:
|
|||
|
"""Test if the content_length filter works correctly with the LT comparator"""
|
|||
|
|
|||
|
flt = QueryFilter(content_length=4, content_length_comp=Cmp.LT)
|
|||
|
|
|||
|
query = Query(flt=flt)
|
|||
|
|
|||
|
assert not query.match_document(es4_document)
|
|||
|
|
|||
|
es4_document.content = 'see'
|
|||
|
assert query.match_document(es4_document)
|
|||
|
|
|||
|
|
|||
|
def test_query_filter_nothing_prediction(es4_document: Es4Document) -> None:
|
|||
|
"""Test if the query predicted NOTHING, it returns an empty list"""
|
|||
|
|
|||
|
query = Query(limit=0)
|
|||
|
|
|||
|
assert not query({es4_document: {}})
|
|||
|
|
|||
|
|
|||
|
@pytest.mark.parametrize('cmp_usage', ('implicit', 'explicit'))
|
|||
|
def test_query_filter_path_sort(
|
|||
|
cmp_usage: Literal['implicit', 'explicit'],
|
|||
|
es4_document: Es4Document,
|
|||
|
identity: Identity,
|
|||
|
) -> None:
|
|||
|
"""Test if ordering by path works"""
|
|||
|
|
|||
|
other_document = Es4Document(identity, es4_document.share, Path('/much.txt'))
|
|||
|
|
|||
|
if cmp_usage == 'implicit':
|
|||
|
query = Query(order_by_field=OrderByField.PATH)
|
|||
|
else:
|
|||
|
query = Query(order_by_field=OrderByField.PATH, order_direction=OrderDirection.ASC)
|
|||
|
|
|||
|
assert query({es4_document: {}, other_document: {}}) == [other_document, es4_document]
|
|||
|
|
|||
|
|
|||
|
def test_query_filter_path_sort_desc(es4_document: Es4Document, identity: Identity) -> None:
|
|||
|
"""Test if ordering by path in descending order works"""
|
|||
|
|
|||
|
other_document = Es4Document(identity, es4_document.share, Path('/much.txt'))
|
|||
|
query = Query(order_by_field=OrderByField.PATH, order_direction=OrderDirection.DESC)
|
|||
|
|
|||
|
assert query({other_document: {}, es4_document: {}}) == [es4_document, other_document]
|
|||
|
|
|||
|
|
|||
|
@pytest.mark.parametrize('cmp_usage', ('implicit', 'explicit'))
|
|||
|
def test_query_filter_local_index_sort(
|
|||
|
cmp_usage: Literal['implicit', 'explicit'],
|
|||
|
es4_document: Es4Document,
|
|||
|
identity: Identity,
|
|||
|
) -> None:
|
|||
|
"""Test if ordering by path works"""
|
|||
|
|
|||
|
other_document = Es4Document(identity, es4_document.share, Path('/much.txt'))
|
|||
|
|
|||
|
if cmp_usage == 'implicit':
|
|||
|
query = Query(order_by_field=OrderByField.LOCAL_INDEX)
|
|||
|
else:
|
|||
|
query = Query(order_by_field=OrderByField.LOCAL_INDEX, order_direction=OrderDirection.ASC)
|
|||
|
|
|||
|
assert query({es4_document: {'local_index': 2}, other_document: {'local_index': 1}}) == [
|
|||
|
other_document,
|
|||
|
es4_document,
|
|||
|
]
|
|||
|
|
|||
|
|
|||
|
def test_query_filter_local_index_sort_desc(es4_document: Es4Document, identity: Identity) -> None:
|
|||
|
"""Test if ordering by path in descending order works"""
|
|||
|
|
|||
|
other_document = Es4Document(identity, es4_document.share, Path('/much.txt'))
|
|||
|
query = Query(order_by_field=OrderByField.LOCAL_INDEX, order_direction=OrderDirection.DESC)
|
|||
|
|
|||
|
assert query({other_document: {'local_index': 1}, es4_document: {'local_index': 2}}) == [
|
|||
|
es4_document,
|
|||
|
other_document,
|
|||
|
]
|
|||
|
|
|||
|
|
|||
|
def test_start_after_path_asc(es4_document: Es4Document, identity: Identity) -> None:
|
|||
|
"""Test if start_after_path works with ascending sort"""
|
|||
|
|
|||
|
other_document = Es4Document(identity, es4_document.share, Path('/much.txt'))
|
|||
|
query = Query(order_by_field=OrderByField.PATH, start_after_path=Path('/much.txt'))
|
|||
|
|
|||
|
assert query({other_document: {}, es4_document: {}}) == [es4_document]
|
|||
|
|
|||
|
|
|||
|
def test_start_after_path_desc(es4_document: Es4Document, identity: Identity) -> None:
|
|||
|
"""Test if start_after_path works with descending sort"""
|
|||
|
|
|||
|
other_document = Es4Document(identity, es4_document.share, Path('/much.txt'))
|
|||
|
query = Query(
|
|||
|
order_by_field=OrderByField.PATH,
|
|||
|
order_direction=OrderDirection.DESC,
|
|||
|
start_after_path=Path('/test.txt'),
|
|||
|
)
|
|||
|
|
|||
|
assert query({other_document: {}, es4_document: {}}) == [other_document]
|
|||
|
|
|||
|
|
|||
|
def test_start_after_local_index_asc(es4_document: Es4Document, identity: Identity) -> None:
|
|||
|
"""Test if start_after_local_index works with ascending sort"""
|
|||
|
|
|||
|
other_document = Es4Document(identity, es4_document.share, Path('/much.txt'))
|
|||
|
query = Query(order_by_field=OrderByField.LOCAL_INDEX, start_after_local_index=1)
|
|||
|
|
|||
|
assert query({other_document: {'local_index': 2}, es4_document: {'local_index': 1}}) == [
|
|||
|
other_document
|
|||
|
]
|
|||
|
|
|||
|
|
|||
|
def test_start_after_local_index_desc(es4_document: Es4Document, identity: Identity) -> None:
|
|||
|
"""Test if start_after_path works with descending sort"""
|
|||
|
|
|||
|
other_document = Es4Document(identity, es4_document.share, Path('/much.txt'))
|
|||
|
query = Query(
|
|||
|
order_by_field=OrderByField.LOCAL_INDEX,
|
|||
|
order_direction=OrderDirection.DESC,
|
|||
|
start_after_local_index=2,
|
|||
|
)
|
|||
|
|
|||
|
assert query({other_document: {'local_index': 1}, es4_document: {'local_index': 2}}) == [
|
|||
|
other_document
|
|||
|
]
|
|||
|
|
|||
|
|
|||
|
def test_start_after_with_empty_output() -> None:
|
|||
|
"""Test if start_after does not fail with an empty document list"""
|
|||
|
|
|||
|
query = Query(start_after_path=Path('/test.txt'))
|
|||
|
|
|||
|
assert query({}) == []
|
|||
|
|
|||
|
|
|||
|
@pytest.mark.parametrize('use_start_after', (True, False), ids=('with_start_after', 'without_start_after'))
|
|||
|
def test_limit(use_start_after, es4_document: Es4Document, identity: Identity) -> None:
|
|||
|
"""Test if document count limiting works"""
|
|||
|
|
|||
|
other_document = Es4Document(identity, es4_document.share, Path('/much.txt'))
|
|||
|
|
|||
|
if use_start_after:
|
|||
|
query = Query(start_after_path=Path('/much.txt'), limit=1)
|
|||
|
expected = es4_document
|
|||
|
else:
|
|||
|
query = Query(limit=1)
|
|||
|
expected = other_document
|
|||
|
|
|||
|
assert query({es4_document: {}, other_document: {}}) == [expected]
|
|||
|
|
|||
|
|
|||
|
def test_byte_limiting_not_reached(es4_document: Es4Document, identity: Identity) -> None:
|
|||
|
"""Test if byte limiting doesn’t do anything if the sum of document sizes is not reached"""
|
|||
|
|
|||
|
other_document = Es4Document(identity, es4_document.share, Path('/much.txt'), content='much')
|
|||
|
query = Query(limit_bytes=15)
|
|||
|
|
|||
|
assert query({es4_document: {}, other_document: {}}) == [other_document, es4_document]
|
|||
|
|
|||
|
|
|||
|
def test_byte_limiting(es4_document: Es4Document, identity: Identity) -> None:
|
|||
|
"""Test if byte limiting works as expected"""
|
|||
|
|
|||
|
other_document = Es4Document(identity, es4_document.share, Path('/much.txt'), content='much')
|
|||
|
query = Query(limit_bytes=5)
|
|||
|
|
|||
|
assert query({es4_document: {}, other_document: {}}) == [other_document]
|
|||
|
|
|||
|
|
|||
|
def test_byte_limiting_with_start_after(es4_document: Es4Document, identity: Identity) -> None:
|
|||
|
"""Test if byte limiting works as expected when not starting from the beginning"""
|
|||
|
|
|||
|
other_document1 = Es4Document(identity, es4_document.share, Path('/much1.txt'), content='much1')
|
|||
|
other_document2 = Es4Document(identity, es4_document.share, Path('/much2.txt'), content='much2')
|
|||
|
query = Query(limit_bytes=5, start_after_path='/much1.txt')
|
|||
|
|
|||
|
assert query({es4_document: {}, other_document1: {}, other_document2: {}}) == [other_document2]
|