earthsnake/tests/test_query.py

436 lines
14 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""Tests for document querying"""
from datetime import datetime, timedelta, timezone
from typing import Literal
import pytest
from earthsnake.document.es4 import Es4Document
from earthsnake.exc import ValidationError
from earthsnake.identity import Identity
from earthsnake.path import Path
from earthsnake.query import (
Cmp,
HistoryMode,
OrderByField,
OrderDirection,
Query,
QueryFilter,
QueryVerdict,
)
def test_init_negative_limit() -> None:
"""Test if initialization fails with a negative limit"""
with pytest.raises(ValidationError) as ctx:
Query(limit=-1)
assert str(ctx.value) == 'query limit must be 0 or greater'
def test_init_mutual_exclusive_start_after() -> None:
"""Test if start_after_local_index and start_after_path are mutually exclusive"""
with pytest.raises(ValidationError) as ctx:
Query(start_after_local_index=1, start_after_path=Path('/test.txt'))
assert (
str(ctx.value)
== 'query start position should be either a local_index or a path, but not both'
)
def test_init_order_start_after_mismatch_path() -> None:
"""Test if init fails if ordering by path but starting at a local index"""
with pytest.raises(ValidationError) as ctx:
Query(start_after_local_index=1, order_by_field=OrderByField.PATH)
assert str(ctx.value) == 'query would order by path, but instructed to start at a local index'
def test_init_order_start_after_mismatch_local_index() -> None:
"""Test if init fails if ordering by local_index but starting at a path"""
with pytest.raises(ValidationError) as ctx:
Query(start_after_path=Path('/test.txt'), order_by_field=OrderByField.LOCAL_INDEX)
assert str(ctx.value) == 'query would order by local index, but instructed to start at a path'
def test_init_path_startswith_mismatch() -> None:
"""Test if init fails if we match an exact path and a path prefix that doesnt match path"""
with pytest.raises(ValidationError) as ctx:
Query(flt=QueryFilter(path=Path('/test.txt'), path_prefix='/other.md'))
assert (
str(ctx.value)
== 'we were asked to match an exact path and a path prefix but they dont match each other'
)
def test_init_path_endswith_mismatch() -> None:
"""Test if init fails if we match an exact path and a path suffix that doesnt match path"""
with pytest.raises(ValidationError) as ctx:
Query(flt=QueryFilter(path=Path('/test.txt'), path_suffix='.md'))
assert (
str(ctx.value)
== 'we were asked to match an exact path and a path suffix but they dont match each other'
)
def test_init_empty_prediction() -> None:
"""Test if verdict is NOTHING if limit is zero"""
query = Query(limit=0)
assert query.prediction == QueryVerdict.NOTHING
def test_init_filter_prediction() -> None:
"""Test if verdict is SOME if we have a filter set"""
query = Query(flt=QueryFilter(path=Path('/test.txt')))
assert query.prediction == QueryVerdict.SOME
@pytest.mark.parametrize(
'history_mode,prediction',
(
pytest.param(HistoryMode.ALL, QueryVerdict.ALL),
pytest.param(HistoryMode.LATEST, QueryVerdict.ALL_LATEST),
),
)
def test_init_all_prediction(history_mode: HistoryMode, prediction: QueryVerdict) -> None:
"""Test if verdict is ALL/ALL_LATEST if we dont have any filtering parameters set"""
query = Query(history_mode=history_mode)
assert query.prediction == prediction
def test_match_doc_no_filter(es4_document: Es4Document) -> None:
"""Test if the empty filter works corretly"""
query = Query()
assert query.match_document(es4_document) is True
def test_match_doc_path(es4_document: Es4Document) -> None:
"""Test if the path works correctly"""
query = Query(flt=QueryFilter(path=Path('/test.txt')))
es4_document.path = Path('/test.txt')
assert query.match_document(es4_document)
es4_document.path = Path('/test.md')
assert not query.match_document(es4_document)
def test_match_doc_path_prefix(es4_document: Es4Document) -> None:
"""Test if the path prefix filter works correctly"""
query = Query(flt=QueryFilter(path_prefix='/test'))
es4_document.path = Path('/test.txt')
assert query.match_document(es4_document)
es4_document.path = Path('/other.txt')
assert not query.match_document(es4_document)
def test_match_doc_path_suffix(es4_document: Es4Document) -> None:
"""Test if the path suffix filter works correctly"""
query = Query(flt=QueryFilter(path_suffix='.txt'))
es4_document.path = Path('/test.txt')
assert query.match_document(es4_document)
es4_document.path = Path('/test.md')
assert not query.match_document(es4_document)
def test_match_doc_author(es4_document: Es4Document, identity: Identity) -> None:
"""Test if the author filter works correctly"""
query = Query(flt=QueryFilter(author=identity))
es4_document.author = identity
assert query.match_document(es4_document)
es4_document.author = Identity.generate('name')
assert not query.match_document(es4_document)
@pytest.mark.parametrize('cmp_usage', ('explicit', 'implicit'))
def test_match_timestamp_eq(
cmp_usage: Literal['explicit', 'implicit'],
es4_document: Es4Document,
) -> None:
"""Test if the timestamp filter works correctly with the EQ comparator"""
flt = QueryFilter(timestamp=datetime(2022, 5, 5, 8, 21, 11, 668993, tzinfo=timezone.utc))
if cmp_usage == 'explicit':
flt['timestamp_comp'] = Cmp.EQ
query = Query(flt=flt)
assert query.match_document(es4_document)
es4_document.timestamp += timedelta(days=1)
assert not query.match_document(es4_document)
def test_match_timestamp_gt(es4_document: Es4Document) -> None:
"""Test if the timestamp filter works correctly with the GT comparator"""
flt = QueryFilter(
timestamp=datetime(2022, 5, 5, 8, 21, 11, 668992, tzinfo=timezone.utc),
timestamp_comp=Cmp.GT,
)
query = Query(flt=flt)
assert query.match_document(es4_document)
es4_document.timestamp -= timedelta(microseconds=1)
assert not query.match_document(es4_document)
def test_match_timestamp_lt(es4_document: Es4Document) -> None:
"""Test if the timestamp filter works correctly with the LT comparator"""
flt = QueryFilter(
timestamp=datetime(2022, 5, 5, 8, 21, 11, 668994, tzinfo=timezone.utc),
timestamp_comp=Cmp.LT,
)
query = Query(flt=flt)
assert query.match_document(es4_document)
es4_document.timestamp += timedelta(microseconds=1)
assert not query.match_document(es4_document)
@pytest.mark.parametrize('cmp_usage', ('explicit', 'implicit'))
def test_match_content_length_eq(
cmp_usage: Literal['explicit', 'implicit'],
es4_document: Es4Document,
) -> None:
"""Test if the content_length filter works correctly with the EQ comparator"""
flt = QueryFilter(content_length=4)
if cmp_usage == 'explicit':
flt['content_length_comp'] = Cmp.EQ
query = Query(flt=flt)
assert query.match_document(es4_document)
es4_document.content = 'other'
assert not query.match_document(es4_document)
def test_match_content_length_gt(es4_document: Es4Document) -> None:
"""Test if the content_length filter works correctly with the GT comparator"""
flt = QueryFilter(content_length=4, content_length_comp=Cmp.GT)
query = Query(flt=flt)
assert not query.match_document(es4_document)
es4_document.content = 'other'
assert query.match_document(es4_document)
def test_match_content_length_lt(es4_document: Es4Document) -> None:
"""Test if the content_length filter works correctly with the LT comparator"""
flt = QueryFilter(content_length=4, content_length_comp=Cmp.LT)
query = Query(flt=flt)
assert not query.match_document(es4_document)
es4_document.content = 'see'
assert query.match_document(es4_document)
def test_query_filter_nothing_prediction(es4_document: Es4Document) -> None:
"""Test if the query predicted NOTHING, it returns an empty list"""
query = Query(limit=0)
assert not query({es4_document: {}})
@pytest.mark.parametrize('cmp_usage', ('implicit', 'explicit'))
def test_query_filter_path_sort(
cmp_usage: Literal['implicit', 'explicit'],
es4_document: Es4Document,
identity: Identity,
) -> None:
"""Test if ordering by path works"""
other_document = Es4Document(identity, es4_document.share, Path('/much.txt'))
if cmp_usage == 'implicit':
query = Query(order_by_field=OrderByField.PATH)
else:
query = Query(order_by_field=OrderByField.PATH, order_direction=OrderDirection.ASC)
assert query({es4_document: {}, other_document: {}}) == [other_document, es4_document]
def test_query_filter_path_sort_desc(es4_document: Es4Document, identity: Identity) -> None:
"""Test if ordering by path in descending order works"""
other_document = Es4Document(identity, es4_document.share, Path('/much.txt'))
query = Query(order_by_field=OrderByField.PATH, order_direction=OrderDirection.DESC)
assert query({other_document: {}, es4_document: {}}) == [es4_document, other_document]
@pytest.mark.parametrize('cmp_usage', ('implicit', 'explicit'))
def test_query_filter_local_index_sort(
cmp_usage: Literal['implicit', 'explicit'],
es4_document: Es4Document,
identity: Identity,
) -> None:
"""Test if ordering by path works"""
other_document = Es4Document(identity, es4_document.share, Path('/much.txt'))
if cmp_usage == 'implicit':
query = Query(order_by_field=OrderByField.LOCAL_INDEX)
else:
query = Query(order_by_field=OrderByField.LOCAL_INDEX, order_direction=OrderDirection.ASC)
assert query({es4_document: {'local_index': 2}, other_document: {'local_index': 1}}) == [
other_document,
es4_document,
]
def test_query_filter_local_index_sort_desc(es4_document: Es4Document, identity: Identity) -> None:
"""Test if ordering by path in descending order works"""
other_document = Es4Document(identity, es4_document.share, Path('/much.txt'))
query = Query(order_by_field=OrderByField.LOCAL_INDEX, order_direction=OrderDirection.DESC)
assert query({other_document: {'local_index': 1}, es4_document: {'local_index': 2}}) == [
es4_document,
other_document,
]
def test_start_after_path_asc(es4_document: Es4Document, identity: Identity) -> None:
"""Test if start_after_path works with ascending sort"""
other_document = Es4Document(identity, es4_document.share, Path('/much.txt'))
query = Query(order_by_field=OrderByField.PATH, start_after_path=Path('/much.txt'))
assert query({other_document: {}, es4_document: {}}) == [es4_document]
def test_start_after_path_desc(es4_document: Es4Document, identity: Identity) -> None:
"""Test if start_after_path works with descending sort"""
other_document = Es4Document(identity, es4_document.share, Path('/much.txt'))
query = Query(
order_by_field=OrderByField.PATH,
order_direction=OrderDirection.DESC,
start_after_path=Path('/test.txt'),
)
assert query({other_document: {}, es4_document: {}}) == [other_document]
def test_start_after_local_index_asc(es4_document: Es4Document, identity: Identity) -> None:
"""Test if start_after_local_index works with ascending sort"""
other_document = Es4Document(identity, es4_document.share, Path('/much.txt'))
query = Query(order_by_field=OrderByField.LOCAL_INDEX, start_after_local_index=1)
assert query({other_document: {'local_index': 2}, es4_document: {'local_index': 1}}) == [
other_document
]
def test_start_after_local_index_desc(es4_document: Es4Document, identity: Identity) -> None:
"""Test if start_after_path works with descending sort"""
other_document = Es4Document(identity, es4_document.share, Path('/much.txt'))
query = Query(
order_by_field=OrderByField.LOCAL_INDEX,
order_direction=OrderDirection.DESC,
start_after_local_index=2,
)
assert query({other_document: {'local_index': 1}, es4_document: {'local_index': 2}}) == [
other_document
]
def test_start_after_with_empty_output() -> None:
"""Test if start_after does not fail with an empty document list"""
query = Query(start_after_path=Path('/test.txt'))
assert query({}) == []
@pytest.mark.parametrize('use_start_after', (True, False), ids=('with_start_after', 'without_start_after'))
def test_limit(use_start_after, es4_document: Es4Document, identity: Identity) -> None:
"""Test if document count limiting works"""
other_document = Es4Document(identity, es4_document.share, Path('/much.txt'))
if use_start_after:
query = Query(start_after_path=Path('/much.txt'), limit=1)
expected = es4_document
else:
query = Query(limit=1)
expected = other_document
assert query({es4_document: {}, other_document: {}}) == [expected]
def test_byte_limiting_not_reached(es4_document: Es4Document, identity: Identity) -> None:
"""Test if byte limiting doesnt do anything if the sum of document sizes is not reached"""
other_document = Es4Document(identity, es4_document.share, Path('/much.txt'), content='much')
query = Query(limit_bytes=15)
assert query({es4_document: {}, other_document: {}}) == [other_document, es4_document]
def test_byte_limiting(es4_document: Es4Document, identity: Identity) -> None:
"""Test if byte limiting works as expected"""
other_document = Es4Document(identity, es4_document.share, Path('/much.txt'), content='much')
query = Query(limit_bytes=5)
assert query({es4_document: {}, other_document: {}}) == [other_document]
def test_byte_limiting_with_start_after(es4_document: Es4Document, identity: Identity) -> None:
"""Test if byte limiting works as expected when not starting from the beginning"""
other_document1 = Es4Document(identity, es4_document.share, Path('/much1.txt'), content='much1')
other_document2 = Es4Document(identity, es4_document.share, Path('/much2.txt'), content='much2')
query = Query(limit_bytes=5, start_after_path='/much1.txt')
assert query({es4_document: {}, other_document1: {}, other_document2: {}}) == [other_document2]