from abc import ABC, abstractmethod
from datetime import datetime
from enum import Enum, auto
import json
import logging
from typing import List, Literal, Mapping, Optional, Tuple, Union
from .types import Cmp
from .document import Document, IncompleteDocument
from .identity import Identity
from .path import Path
from .share import Share
from .query import HistoryMode, Query
from .format_validator import FormatValidatorBase
from .exc import EarthsnakeError, ReplicaIsClosedError, ValidationError
from .util import microsecond_now, random_id
# --------------------------------------------------
J = json.dumps
logger = logging.getLogger(__name__)
# ================================================================================
class IngestEventKind(Enum):
failure = auto()
nothing_happened = auto()
success = auto()
class IngestEventBase:
kind: IngestEventKind
max_local_index: int
class IngestEventFailure(IngestEventBase):
reason: Literal['write_error', 'invalid_document']
err: Optional[Exception]
class IngestEventNothingHappened(IngestEventBase):
reason: Literal['obsolete_from_same_author', 'already_had_it']
doc: Document
class IngestEventSuccess(IngestEventBase):
doc: Document
doc_is_latest: bool
prev_doc_from_same_author: Optional[Document]
prev_latest_doc: Optional[Document]
IngestEvent = Union[IngestEventFailure, IngestEventNothingHappened, IngestEventSuccess]
def doc_compare_newest_first(a: Document, b: Document) -> Cmp:
# Sorts by timestamp DESC (newest fist) and breaks ties using the signature ASC.
return compare_arrays(
[a.timestamp, a.signature],
[b.timestamp, a.signature],
["DESC", "ASC"],
class Replica: # (IReplica):
"""A replica of a share's data, used to read, write, and synchronise data to
Should be closed using the `close` method when no longer being used.
.. code-block:: python
replica = Replica('+a.a123', FormatValidator.ES4, MemoryReplicaDriver())
replica_id: str # todo: save it to the driver too, and reload it when starting up
#: The address of the share this replica belongs to
share: Share
#: The validator used to validate ingested documents
format_validator: FormatValidatorBase
replica_driver: ReplicaDriverBase
bus: Superbus[ReplicaBusChannel]
_is_closed = False
_ingest_lock: Lock[IngestEvent]
def __init__(
share: Union[str, Workspace],
validator: FormatValidatorBase,
driver: ReplicaDriverBase,
) -> None:
if isinstance(share, str):
share = Workspace.from_string(share)
'constructor. driver = %s',
# If we got a class instead of an actual driver object, lets instantiate the driver
if isinstance(driver, type):
driver = driver(share)
self.replica_id = 'replica-' + random_id()
self.share = share
self.format_validator = validator
self.replica_driver = driver
self.bus = Superbus('|')
self._ingest_lock = Lock()
# --------------------------------------------------
def is_closed(self) -> bool:
"""Returns whether the replica is closed or not
return self._is_closed
async def close(self, erase: bool) -> None:
"""Closes the replica, preventing new documents from being ingested or events being emitted
Any methods called after closing will return `ReplicaIsClosedError`
:param erase: Erase the contents of the replica. Defaults to `false`
if self._is_closed:
raise ReplicaIsClosedError()
2022-04-12 13:25:29 +00:00
# TODO: do this all in a lock?
logger.debug(' sending willClose blockingly...')
await self.bus.send_and_wait("willClose")
logger.debug(' marking self as closed...')
2022-05-03 13:27:54 +00:00
self._is_closed = True
2022-04-12 13:25:29 +00:00
logger.debug(' closing ReplicaDriver (erase = %s)...', erase)
await self.replica_driver.close(erase)
logger.debug(' sending didClose nonblockingly...')
await self.bus.send_and_wait('didClose')
logger.debug('...closing done')
# --------------------------------------------------
async def get_config(self, key: str) -> Optional[str]:
"""Get a specific config value"""
if self._is_closed:
raise ReplicaIsClosedError()
2022-04-12 13:25:29 +00:00
return await self.replica_driver.get_config(key)
async def set_config(self, key: str, value: str) -> None:
if self._is_closed:
raise ReplicaIsClosedError()
2022-04-12 13:25:29 +00:00
async def list_config_keys(self, ) -> List[str]:
"""List all available configuration keys
if self._is_closed:
raise ReplicaIsClosedError()
2022-04-12 13:25:29 +00:00
async def delete_config(self, key: str) -> bool:
2022-05-03 13:27:54 +00:00
if self._is_closed:
raise ReplicaIsClosedError()
2022-04-12 13:25:29 +00:00
# --------------------------------------------------
def get_max_local_index(self) -> int:
2022-05-03 13:27:54 +00:00
if self._is_closed:
raise ReplicaIsClosedError()
return self.replica_driver.get_max_local_index()
async def get_docs_after_local_index(
history_mode: HistoryMode,
start_after: int,
limit: Optional[int] = None,
2022-05-03 13:27:54 +00:00
) -> List[Document]:
'get_docs_after_local_index(%s, %s, %d)',
limit or -1,
if self._is_closed:
raise ReplicaIsClosedError()
query: Query = {
'history_mode': history_mode,
'order_by': 'localIndex ASC',
'start_after': {
'local_index': start_after,
'limit': limit,
return await self.replica_driver.query_docs(query)
async def get_all_docs(self) -> List[Document]:
2022-05-03 13:27:54 +00:00
if self._is_closed:
raise ReplicaIsClosedError()
2022-04-12 13:25:29 +00:00
return await self.replica_driver.query_docs(
order_by='path ASC',
async def get_latest_docs(self) -> List[Document]:
if self._is_closed:
raise ReplicaIsClosedError()
2022-04-12 13:25:29 +00:00
return await self.replica_driver.query_docs(
order_by='path ASC',
async def get_all_docs_at_path(self, path: Path) -> List[Document]:
logger.debug('get_all_docs_at_path("%s")', path)
2022-05-03 13:27:54 +00:00
if self._is_closed:
raise ReplicaIsClosedError()
2022-04-12 13:25:29 +00:00
return await self.replica_driver.query_docs(
order_by='path ASC',
filter={'path': path},
async def get_latest_doc_at_path(self, path: Path) -> Optional[Document]:
logger.debug('get_latest_docs_at_path("%s")', path)
2022-05-03 13:27:54 +00:00
if self._is_closed:
raise ReplicaIsClosedError()
2022-04-12 13:25:29 +00:00
docs = await self.replica_driver.query_docs(
order_by='path ASC',
filter={'path': path},
2022-05-03 13:27:54 +00:00
if not docs:
return None
async def query_docs(self, query: Optional[Query] = None) -> List[Document]:
"""Returns an array of docs for a given query
.. code-block:: python
my_query = {
'filter': {
'path_ends_with': '.txt'
'limit': 5,
first_five_text_docs = await my_replica.query_docs(my_query)
logger.debug('queryDocs %s', query)
if self._is_closed:
raise ReplicaIsClosedError()
2022-04-12 13:25:29 +00:00
return await self.replica_driver.query_docs(query)
# def query_paths(query: Optional[Query]) -> List[Path]: pass
# def query_authors(query: Optional[Query]) -> List[AuthorAddress]: pass
# --------------------------------------------------
async def set(
keypair: Identity,
doc_to_set: IncompleteDocument,
) -> IngestEvent:
"""Adds a new document to the replica
If a document signed by the same identity exists at the same path, it will be overwritten.
logger.debug('set %s', doc_to_set)
2022-05-03 13:27:54 +00:00
if self._is_closed:
raise ReplicaIsClosedError()
2022-04-12 13:25:29 +00:00
'...deciding timestamp: getting latest doc at the same path (from any author)',
2022-05-03 13:27:54 +00:00
timestamp: int
2022-04-12 13:25:29 +00:00
if isinstance(doc_to_set.timestamp, int):
2022-05-03 13:27:54 +00:00
timestamp = doc_to_set.timestamp
2022-04-12 13:25:29 +00:00
logger.debug('...docToSet already has a timestamp; not changing it from %d', timestamp)
2022-05-03 13:27:54 +00:00
2022-04-12 13:25:29 +00:00
# bump timestamp if needed to win over existing latest doc at same path
latest_doc_same_path = await self.get_latest_doc_at_path(doc_to_set.path)
2022-05-03 13:27:54 +00:00
if latest_doc_same_path is None:
timestamp = microsecond_now()
2022-04-12 13:25:29 +00:00
' existing latest doc, setting timestamp to now() = %s', timestamp
2022-05-03 13:27:54 +00:00
timestamp = max(microsecond_now(), latest_doc_same_path.timestamp + 1)
2022-04-12 13:25:29 +00:00
'...existing latest doc found, bumping timestamp to win if needed = %s',
2022-05-03 13:27:54 +00:00
doc = Document(
2022-04-12 13:25:29 +00:00
2022-05-03 13:27:54 +00:00
2022-04-12 13:25:29 +00:00
content_hash=await Crypto.sha256base32(doc_to_set.content),
delete_after=doc_to_set.delete_after or None,
2022-05-03 13:27:54 +00:00
2022-04-12 13:25:29 +00:00
signature='?', # signature will be added in just a moment
# _localIndex will be added during upsert. it's not needed for the signature.
2022-05-03 13:27:54 +00:00
2022-04-12 13:25:29 +00:00
logger.debug('...signing doc')
2022-05-03 13:27:54 +00:00
2022-04-12 13:25:29 +00:00
signed_doc = await self.format_validator.sign_document(keypair, doc)
except EarthsnakeError:
return {
'kind': 'failure',
'reason': 'invalid_document',
'err': signed_doc,
'max_local_index': self.replica_driver.get_max_local_index(),
logger.debug('...signature = %s', signed_doc.signature)
ingest_event = await self.ingest(signed_doc)
logger.debug('...done ingesting')
logger.debug('...set is done.')
return ingest_event
async def ingest(self, doc_to_ingest: Doc) -> IngestEvent:
"""Ingest an existing signed document to the replica
logger.debug('ingest %s', doc_to_ingest)
if self._is_closed:
raise ReplicaIsClosedError()
2022-04-12 13:25:29 +00:00
logger.debug('...removing extra fields')
2022-05-03 13:27:54 +00:00
2022-04-12 13:25:29 +00:00
remove_results_or_err = self.format_validator.remove_extra_fields(doc_to_ingest)
2022-05-03 13:27:54 +00:00
except EarthsnakeError as exc:
2022-04-12 13:25:29 +00:00
return {
'kind': "failure",
'reason': "invalid_document",
'err': exc,
'max_local_index': self.replica_driver.get_max_local_index(),
doc_to_ingest = remove_results_or_err.doc # a copy of doc without extra fields
extra_fields = remove_results_or_err.extras # any extra fields starting with underscores
if extra_fields:
2022-04-12 13:25:29 +00:00
logger.debug('...extra fields found: %s', J(extra_fields))
2022-05-03 13:27:54 +00:00
# now actually check doc validity against core schema
2022-05-03 13:27:54 +00:00
except EarthsnakeError as exc:
2022-04-12 13:25:29 +00:00
return {
'kind': "failure",
'reason': "invalid_document",
'err': exc,
'max_local_index': self.replica_driver.get_max_local_index(),
async def write_to_driver_with_lock() -> IngestEvent:
# get other docs at the same path
logger.debug(' >> ingest: start of protected region')
logger.debug(' > getting other history docs at the same path by any author')
existing_docs_same_path = await self.get_all_docs_at_path(doc_to_ingest.path)
logger.debug(' > %d', len(existing_docs_same_path))
logger.debug(' > getting prevLatest and prevSameAuthor')
prev_latest: Optional[Document] = existing_docs_same_path[0] if existing_docs_same_path else None
prev_same_author: Optional[Document] = [
for document in existing_docs_same_path
if ==
][0] or None
logger.debug(' > checking if new doc is latest at this path')
is_latest = existing_docs_same_path[0] == doc_to_ingest
2022-04-12 13:25:29 +00:00
logger.debug(' > ...isLatest: %s', is_latest)
2022-05-03 13:27:54 +00:00
if not is_latest and prev_same_author is not None:
' > new doc is not latest and there is another one from the same author...'
# check if this is obsolete or redudant from the same author
doc_comp = doc_compare_newest_first(doc_to_ingest, prev_same_author)
2022-05-03 13:27:54 +00:00
if doc_comp == Cmp.GT:
2022-04-12 13:25:29 +00:00
logger.debug(' > new doc is GT prevSameAuthor, so it is obsolete')
return {
'kind': "nothing_happened",
'reason': "obsolete_from_same_author",
'doc': doc_to_ingest,
'max_local_index': self.replica_driver.get_max_local_index(),
2022-05-03 13:27:54 +00:00
if doc_comp == Cmp.EQ:
2022-04-12 13:25:29 +00:00
' > new doc is EQ prevSameAuthor, so it is redundant (already_had_it)',
2022-05-03 13:27:54 +00:00
2022-04-12 13:25:29 +00:00
return {
'kind': "nothing_happened",
'reason': "already_had_it",
'doc': doc_to_ingest,
'max_local_index': self.replica_driver.get_max_local_index(),
# save it
logger.debug(" > upserting into ReplicaDriver...")
# TODO: pass existing_docs_same_path to save another lookup
doc_as_written = await self.replica_driver.upsert(doc_to_ingest)
logger.debug(" > ...done upserting into ReplicaDriver")
logger.debug(" > ...getting ReplicaDriver maxLocalIndex...")
2022-05-03 13:27:54 +00:00
max_local_index = self.replica_driver.get_max_local_index()
2022-04-12 13:25:29 +00:00
' >> ingest: end of protected region, returning a WriteEvent from the lock'
2022-05-03 13:27:54 +00:00
2022-04-12 13:25:29 +00:00
return {
'kind': "success",
'max_local_index': max_local_index,
'doc': doc_as_written, # with updated extra properties like _localIndex
'doc_is_latest': is_latest,
'prev_doc_from_same_author': prev_same_author,
'prev_latest_doc': prev_latest,
logger.debug(" >> ingest: running protected region...")
ingest_event: IngestEvent = await
logger.debug(" >> ingest: ...done running protected region")
logger.debug("...send ingest event after releasing the lock")
logger.debug("...ingest event: %s", ingest_event)
await self.bus.send_and_wait(
) # include the path in the channel even on failures
2022-05-03 13:27:54 +00:00
async def overwrite_all_docs_by_author(self, keypair: Identity) -> int:
"""Overwrite every document from this author, including history versions, with an empty doc
:returns: the number of documents changed, or -1 if there was an error.
logger.debug('overwriteAllDocsByAuthor("%s")', keypair.address)
if self._is_closed:
raise ReplicaIsClosedError()
# TODO: do this in batches
2022-04-12 13:25:29 +00:00
query = Query(
flt={'author': keypair.address},
docs_to_overwrite = await self.query_docs(query)
logger.debug(' ...found %d docs to overwrite', len(docs_to_overwrite))
num_overwritten = 0
num_already_empty = 0
for doc in docs_to_overwrite:
if not doc.content:
num_already_empty += 1
2022-04-12 13:25:29 +00:00
# remove extra fields
cleaned_result = self.format_validator.remove_extra_fields(doc)
cleaned_doc = cleaned_result.doc
# make new doc which is empty and just barely newer than the original
2022-05-03 13:27:54 +00:00
empty_doc = Document(
2022-04-12 13:25:29 +00:00
content_hash=await Crypto.sha256base32(''),
timestamp=doc.timestamp + 1,
2022-05-03 13:27:54 +00:00
2022-04-12 13:25:29 +00:00
# sign and ingest it
signed_doc = await self.format_validator.sign_document(keypair, empty_doc)
except EarthsnakeError:
return signed_doc
ingest_event = await self.ingest(signed_doc)
if ingest_event.kind == 'failure':
return ValidationError(
f'ingestion error during overwriteAllDocsBySameAuthor: {ingest_event.reason}: {ingest_event.err}',
if ingest_event.kind == 'nothing_happened':
return ValidationError(
f'ingestion did nothing during overwriteAllDocsBySameAuthor: {ingest_event.reason}',
# success
num_overwritten += 1
' ...done; %d overwritten to be empty; %d were already empty; out of total %d docs',
2022-05-03 13:27:54 +00:00
return num_overwritten