earthsnake/earthsnake/query.py

270 lines
8.6 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""Document querying for replicas"""
from datetime import datetime
from enum import Enum, auto
from typing import Any, Dict, List, Optional, TypedDict
from .document import Document
from .exc import ValidationError
from .identity import Identity
from .path import Path
class HistoryMode(Enum):
"""Document query history mode"""
ALL = auto()
LATEST = auto()
class OrderByField(Enum):
"""Document ordering field"""
PATH = auto()
LOCAL_INDEX = auto()
class OrderDirection(Enum):
"""Document ordering direction"""
ASC = auto()
DESC = auto()
class Cmp(Enum):
"""Comparison options"""
EQ = auto()
LT = auto()
GT = auto()
class QueryVerdict(Enum):
"""Basic verdict of a query"""
ALL = auto()
ALL_LATEST = auto()
SOME = auto()
NOTHING = auto()
class QueryFilter(TypedDict, total=False):
"""Filtering parameters for a query"""
path: Path
path_prefix: str
path_suffix: str
author: Identity
timestamp: datetime
timestamp_comp: Cmp
content_length: int
content_length_comp: Cmp
class Query: # pylint: disable=too-many-instance-attributes
"""Document querying"""
def __init__( # pylint: disable=too-many-arguments
self,
history_mode: HistoryMode = HistoryMode.LATEST,
order_by_field: OrderByField = OrderByField.PATH,
order_direction: OrderDirection = OrderDirection.ASC,
start_after_local_index: Optional[int] = None,
start_after_path: Optional[Path] = None,
flt: Optional[QueryFilter] = None,
limit: Optional[int] = None,
limit_bytes: Optional[int] = None,
) -> None:
if limit is not None and limit < 0:
raise ValidationError('query limit must be 0 or greater')
if start_after_local_index is not None and start_after_path is not None:
raise ValidationError(
'query start position should be either a local_index or a path, but not both'
)
if order_by_field == OrderByField.PATH and start_after_local_index is not None:
raise ValidationError(
'query would order by path, but instructed to start at a local index'
)
if order_by_field == OrderByField.LOCAL_INDEX and start_after_path is not None:
raise ValidationError(
'query would order by local index, but instructed to start at a path'
)
# TODO check if contentLength and timestamp are reasonable numbers
if flt:
if (
flt.get('path')
and flt.get('path_prefix') is not None
and not flt['path'].startswith(flt['path_prefix'])
):
raise ValidationError(
'we were asked to match an exact path and a path prefix but they dont match '
'each other'
)
if (
flt.get('path')
and flt.get('path_suffix') is not None
and not flt['path'].endswith(flt['path_suffix'])
):
raise ValidationError(
'we were asked to match an exact path and a path suffix but they dont '
'match each other'
)
self.history_mode = history_mode
self.order_by_field = order_by_field
self.order_direction = order_direction
self.limit = limit
self.limit_bytes = limit_bytes
self.flt = flt
self.start_after_local_index = start_after_local_index
self.start_after_path = start_after_path
self.prediction = (
QueryVerdict.ALL if self.history_mode == HistoryMode.ALL else QueryVerdict.ALL_LATEST
)
if self.limit == 0:
self.prediction = QueryVerdict.NOTHING
elif (
flt
or start_after_local_index is not None
or start_after_path is not None
or limit
or limit_bytes
):
self.prediction = QueryVerdict.SOME
def match_document(self, document: Document) -> bool:
"""Check if document matches this query"""
# pylint: disable=too-many-branches,too-many-return-statements
if not self.flt:
return True
if self.flt.get('path') is not None and document.path != self.flt['path']:
return False
if self.flt.get('path_prefix') is not None and not document.path.startswith(
self.flt['path_prefix']
):
return False
if self.flt.get('path_suffix') is not None and not document.path.endswith(
self.flt['path_suffix']
):
return False
if self.flt.get('author') and document.author != self.flt['author']:
return False
if self.flt.get('timestamp'):
comparator = self.flt.get('timestamp_comp', Cmp.EQ)
if comparator == Cmp.EQ and document.timestamp != self.flt['timestamp']:
return False
if comparator == Cmp.GT and document.timestamp <= self.flt['timestamp']:
return False
if comparator == Cmp.LT and document.timestamp >= self.flt['timestamp']:
return False
if self.flt.get('content_length'):
comparator = self.flt.get('content_length_comp', Cmp.EQ)
if comparator == Cmp.EQ and document.content_length != self.flt['content_length']:
return False
if comparator == Cmp.GT and document.content_length <= self.flt['content_length']:
return False
if comparator == Cmp.LT and document.content_length >= self.flt['content_length']:
return False
return True
def __call__(self, documents: Dict[Document, Dict[str, Any]]) -> List[Document]:
"""Filter a list of documents"""
# pylint: disable=too-many-branches
if self.prediction == QueryVerdict.NOTHING:
return []
output = [document for document in documents if self.match_document(document)]
if self.order_by_field == OrderByField.PATH:
output.sort(
key=lambda document: str(document.path),
reverse=self.order_direction == OrderDirection.DESC,
)
elif self.order_by_field == OrderByField.LOCAL_INDEX:
output.sort(
key=lambda document: documents[document]['local_index'], # type: ignore
reverse=self.order_direction == OrderDirection.DESC,
)
else: # pragma: no cover
raise TypeError()
start_at = 0
end_at = 0
if self.start_after_path is not None or self.start_after_local_index is not None:
for idx, document in enumerate(output):
if ( # pylint: disable=too-many-boolean-expressions
self.start_after_path is not None
and (
(
self.order_direction == OrderDirection.ASC
and document.path > self.start_after_path
)
or (
self.order_direction == OrderDirection.DESC
and document.path < self.start_after_path
)
)
) or (
self.start_after_local_index is not None
and (
(
self.order_direction == OrderDirection.ASC
and documents[document]['local_index'] > self.start_after_local_index
)
or (
self.order_direction == OrderDirection.DESC
and documents[document]['local_index'] < self.start_after_local_index
)
)
):
start_at = idx
break
# Apply count limit (ie. stop after self.limit items)
if self.limit is not None:
end_at = start_at + self.limit
else:
end_at = len(output)
current_size = 0
new_end = None
# Apply byte limit
if self.limit_bytes is not None:
for idx, document in enumerate(output[start_at:end_at]):
if current_size + document.content_length > self.limit_bytes:
new_end = idx
break
current_size += document.content_length
if new_end is not None:
end_at = start_at + new_end
return output[start_at:end_at]