599 lines
20 KiB
Python
599 lines
20 KiB
Python
from abc import ABC, abstractmethod
|
||
from datetime import datetime
|
||
from enum import Enum, auto
|
||
import json
|
||
import logging
|
||
from typing import List, Literal, Mapping, Optional, Tuple, Union
|
||
|
||
#import { Lock, Superbus } from "../../deps.ts";
|
||
from .types import Cmp
|
||
#import {
|
||
# LocalIndex,
|
||
# ShareAddress,
|
||
#} from "../util/doc-types.ts";
|
||
from .document import Document, IncompleteDocument
|
||
from .identity import Identity
|
||
from .path import Path
|
||
from .share import Share
|
||
from .query import HistoryMode, Query
|
||
#import {
|
||
# IngestEvent,
|
||
# IReplica,
|
||
# IReplicaDriver,
|
||
# ReplicaBusChannel,
|
||
# ReplicaId,
|
||
#} from "./replica-types.ts";
|
||
#import { IFormatValidator } from "../format-validators/format-validator-types.ts";
|
||
from .format_validator import FormatValidatorBase
|
||
from .exc import EarthsnakeError, ReplicaIsClosedError, ValidationError
|
||
#import { randomId } from "../util/misc.ts";
|
||
from .util import microsecond_now, random_id
|
||
#import { compareArrays } from "./compare.ts";
|
||
#import { checkShareIsValid } from "../core-validators/addresses.ts";
|
||
#
|
||
#import { Crypto } from "../crypto/crypto.ts";
|
||
#
|
||
# --------------------------------------------------
|
||
#
|
||
#import { Logger } from "../util/log.ts";
|
||
J = json.dumps
|
||
logger = logging.getLogger(__name__)
|
||
#
|
||
# ================================================================================
|
||
|
||
|
||
class IngestEventKind(Enum):
|
||
failure = auto()
|
||
nothing_happened = auto()
|
||
success = auto()
|
||
|
||
|
||
class IngestEventBase:
|
||
kind: IngestEventKind
|
||
max_local_index: int
|
||
|
||
|
||
class IngestEventFailure(IngestEventBase):
|
||
reason: Literal['write_error', 'invalid_document']
|
||
err: Optional[Exception]
|
||
|
||
|
||
class IngestEventNothingHappened(IngestEventBase):
|
||
reason: Literal['obsolete_from_same_author', 'already_had_it']
|
||
doc: Document
|
||
|
||
|
||
class IngestEventSuccess(IngestEventBase):
|
||
doc: Document
|
||
doc_is_latest: bool
|
||
prev_doc_from_same_author: Optional[Document]
|
||
prev_latest_doc: Optional[Document]
|
||
|
||
|
||
IngestEvent = Union[IngestEventFailure, IngestEventNothingHappened, IngestEventSuccess]
|
||
|
||
def doc_compare_newest_first(a: Document, b: Document) -> Cmp:
|
||
# Sorts by timestamp DESC (newest fist) and breaks ties using the signature ASC.
|
||
return compare_arrays(
|
||
[a.timestamp, a.signature],
|
||
[b.timestamp, a.signature],
|
||
["DESC", "ASC"],
|
||
)
|
||
|
||
|
||
class Replica: # (IReplica):
|
||
"""A replica of a share's data, used to read, write, and synchronise data to
|
||
|
||
Should be closed using the `close` method when no longer being used.
|
||
|
||
.. code-block:: python
|
||
|
||
replica = Replica('+a.a123', FormatValidator.ES4, MemoryReplicaDriver())
|
||
"""
|
||
|
||
replica_id: str # todo: save it to the driver too, and reload it when starting up
|
||
|
||
#: The address of the share this replica belongs to
|
||
share: Share
|
||
|
||
#: The validator used to validate ingested documents
|
||
format_validator: FormatValidatorBase
|
||
replica_driver: ReplicaDriverBase
|
||
bus: Superbus[ReplicaBusChannel]
|
||
|
||
_is_closed = False
|
||
_ingest_lock: Lock[IngestEvent]
|
||
|
||
def __init__(
|
||
self,
|
||
share: Union[str, Workspace],
|
||
validator: FormatValidatorBase,
|
||
driver: ReplicaDriverBase,
|
||
) -> None:
|
||
if isinstance(share, str):
|
||
share = Workspace.from_string(share)
|
||
|
||
logger.debug(
|
||
'constructor. driver = %s',
|
||
driver.__class__.__name__,
|
||
)
|
||
|
||
# If we got a class instead of an actual driver object, let’s instantiate the driver
|
||
if isinstance(driver, type):
|
||
driver = driver(share)
|
||
|
||
self.replica_id = 'replica-' + random_id()
|
||
self.share = share
|
||
self.format_validator = validator
|
||
self.replica_driver = driver
|
||
self.bus = Superbus('|')
|
||
self._ingest_lock = Lock()
|
||
|
||
# --------------------------------------------------
|
||
# LIFECYCLE
|
||
|
||
def is_closed(self) -> bool:
|
||
"""Returns whether the replica is closed or not
|
||
"""
|
||
|
||
return self._is_closed
|
||
|
||
async def close(self, erase: bool) -> None:
|
||
"""Closes the replica, preventing new documents from being ingested or events being emitted
|
||
|
||
Any methods called after closing will return `ReplicaIsClosedError`
|
||
|
||
:param erase: Erase the contents of the replica. Defaults to `false`
|
||
"""
|
||
|
||
logger.debug('closing...')
|
||
|
||
if self._is_closed:
|
||
raise ReplicaIsClosedError()
|
||
|
||
# TODO: do this all in a lock?
|
||
logger.debug(' sending willClose blockingly...')
|
||
await self.bus.send_and_wait("willClose")
|
||
logger.debug(' marking self as closed...')
|
||
self._is_closed = True
|
||
logger.debug(' closing ReplicaDriver (erase = %s)...', erase)
|
||
await self.replica_driver.close(erase)
|
||
logger.debug(' sending didClose nonblockingly...')
|
||
await self.bus.send_and_wait('didClose')
|
||
logger.debug('...closing done')
|
||
|
||
# --------------------------------------------------
|
||
# CONFIG
|
||
|
||
async def get_config(self, key: str) -> Optional[str]:
|
||
"""Get a specific config value"""
|
||
|
||
if self._is_closed:
|
||
raise ReplicaIsClosedError()
|
||
|
||
return await self.replica_driver.get_config(key)
|
||
|
||
async def set_config(self, key: str, value: str) -> None:
|
||
"""Set a specific configuration value"""
|
||
|
||
if self._is_closed:
|
||
raise ReplicaIsClosedError()
|
||
|
||
return await self.replica_driver.set_config(key, value)
|
||
|
||
async def list_config_keys(self, ) -> List[str]:
|
||
"""List all available configuration keys
|
||
"""
|
||
|
||
if self._is_closed:
|
||
raise ReplicaIsClosedError()
|
||
|
||
return await self.replica_driver.list_configKeys()
|
||
|
||
async def delete_config(self, key: str) -> bool:
|
||
"""Delete a key from the configuration
|
||
"""
|
||
|
||
if self._is_closed:
|
||
raise ReplicaIsClosedError()
|
||
|
||
return await self.replica_driver.delete_config(key)
|
||
|
||
# --------------------------------------------------
|
||
# GET
|
||
|
||
def get_max_local_index(self) -> int:
|
||
"""Returns the max local index of all stored documents
|
||
"""
|
||
|
||
if self._is_closed:
|
||
raise ReplicaIsClosedError()
|
||
|
||
return self.replica_driver.get_max_local_index()
|
||
|
||
async def get_docs_after_local_index(
|
||
self,
|
||
history_mode: HistoryMode,
|
||
start_after: int,
|
||
limit: Optional[int] = None,
|
||
) -> List[Document]:
|
||
"""Get all documents after a specific index
|
||
"""
|
||
|
||
logger.debug(
|
||
'get_docs_after_local_index(%s, %s, %d)',
|
||
history_mode,
|
||
start_after,
|
||
limit or -1,
|
||
)
|
||
if self._is_closed:
|
||
raise ReplicaIsClosedError()
|
||
|
||
query: Query = {
|
||
'history_mode': history_mode,
|
||
'order_by': 'localIndex ASC',
|
||
'start_after': {
|
||
'local_index': start_after,
|
||
},
|
||
'limit': limit,
|
||
}
|
||
|
||
return await self.replica_driver.query_docs(query)
|
||
|
||
async def get_all_docs(self) -> List[Document]:
|
||
"""Returns all documents, including historical versions of documents by other identities
|
||
"""
|
||
logger.debug('get_all_docs()')
|
||
|
||
if self._is_closed:
|
||
raise ReplicaIsClosedError()
|
||
|
||
return await self.replica_driver.query_docs(
|
||
history_mode=HistoryMode.ALL,
|
||
order_by='path ASC',
|
||
)
|
||
|
||
async def get_latest_docs(self) -> List[Document]:
|
||
"""Returns latest document from every path
|
||
"""
|
||
|
||
logger.debug('get_latest_docs()')
|
||
|
||
if self._is_closed:
|
||
raise ReplicaIsClosedError()
|
||
|
||
return await self.replica_driver.query_docs(
|
||
history_mode=HistoryMode.LATEST,
|
||
order_by='path ASC',
|
||
)
|
||
|
||
async def get_all_docs_at_path(self, path: Path) -> List[Document]:
|
||
"""Returns all versions of a document by different authors from a specific path
|
||
"""
|
||
|
||
logger.debug('get_all_docs_at_path("%s")', path)
|
||
|
||
if self._is_closed:
|
||
raise ReplicaIsClosedError()
|
||
|
||
return await self.replica_driver.query_docs(
|
||
history_mode=HistoryMode.ALL,
|
||
order_by='path ASC',
|
||
filter={'path': path},
|
||
)
|
||
|
||
async def get_latest_doc_at_path(self, path: Path) -> Optional[Document]:
|
||
"""Returns the most recently written version of a document at a path"""
|
||
|
||
logger.debug('get_latest_docs_at_path("%s")', path)
|
||
|
||
if self._is_closed:
|
||
raise ReplicaIsClosedError()
|
||
|
||
docs = await self.replica_driver.query_docs(
|
||
history_mode=HistoryMode.LATEST,
|
||
order_by='path ASC',
|
||
filter={'path': path},
|
||
)
|
||
|
||
if not docs:
|
||
return None
|
||
|
||
return docs[0]
|
||
|
||
async def query_docs(self, query: Optional[Query] = None) -> List[Document]:
|
||
"""Returns an array of docs for a given query
|
||
|
||
.. code-block:: python
|
||
|
||
my_query = {
|
||
'filter': {
|
||
'path_ends_with': '.txt'
|
||
},
|
||
'limit': 5,
|
||
}
|
||
|
||
first_five_text_docs = await my_replica.query_docs(my_query)
|
||
"""
|
||
|
||
logger.debug('queryDocs %s', query)
|
||
|
||
if self._is_closed:
|
||
raise ReplicaIsClosedError()
|
||
|
||
return await self.replica_driver.query_docs(query)
|
||
|
||
# def query_paths(query: Optional[Query]) -> List[Path]: pass
|
||
# def query_authors(query: Optional[Query]) -> List[AuthorAddress]: pass
|
||
|
||
# --------------------------------------------------
|
||
# SET
|
||
|
||
async def set(
|
||
self,
|
||
keypair: Identity,
|
||
doc_to_set: IncompleteDocument,
|
||
) -> IngestEvent:
|
||
"""Adds a new document to the replica
|
||
|
||
If a document signed by the same identity exists at the same path, it will be overwritten.
|
||
"""
|
||
|
||
logger.debug('set %s', doc_to_set)
|
||
|
||
if self._is_closed:
|
||
raise ReplicaIsClosedError()
|
||
|
||
logger.debug(
|
||
'...deciding timestamp: getting latest doc at the same path (from any author)',
|
||
)
|
||
|
||
timestamp: int
|
||
|
||
if isinstance(doc_to_set.timestamp, int):
|
||
timestamp = doc_to_set.timestamp
|
||
logger.debug('...docToSet already has a timestamp; not changing it from %d', timestamp)
|
||
else:
|
||
# bump timestamp if needed to win over existing latest doc at same path
|
||
latest_doc_same_path = await self.get_latest_doc_at_path(doc_to_set.path)
|
||
|
||
if latest_doc_same_path is None:
|
||
timestamp = microsecond_now()
|
||
logger.debug(
|
||
'...no existing latest doc, setting timestamp to now() = %s', timestamp
|
||
)
|
||
else:
|
||
timestamp = max(microsecond_now(), latest_doc_same_path.timestamp + 1)
|
||
logger.debug(
|
||
'...existing latest doc found, bumping timestamp to win if needed = %s',
|
||
timestamp,
|
||
)
|
||
|
||
doc = Document(
|
||
format=ValidationFormat.ES4,
|
||
author=keypair.address,
|
||
content=doc_to_set.content,
|
||
content_hash=await Crypto.sha256base32(doc_to_set.content),
|
||
delete_after=doc_to_set.delete_after or None,
|
||
path=doc_to_set.path,
|
||
timestamp=timestamp,
|
||
workspace=self.share,
|
||
signature='?', # signature will be added in just a moment
|
||
# _localIndex will be added during upsert. it's not needed for the signature.
|
||
)
|
||
logger.debug('...signing doc')
|
||
|
||
try:
|
||
signed_doc = await self.format_validator.sign_document(keypair, doc)
|
||
except EarthsnakeError:
|
||
return {
|
||
'kind': 'failure',
|
||
'reason': 'invalid_document',
|
||
'err': signed_doc,
|
||
'max_local_index': self.replica_driver.get_max_local_index(),
|
||
}
|
||
|
||
logger.debug('...signature = %s', signed_doc.signature)
|
||
|
||
logger.debug('...ingesting')
|
||
logger.debug('-----------------------')
|
||
ingest_event = await self.ingest(signed_doc)
|
||
logger.debug('-----------------------')
|
||
logger.debug('...done ingesting')
|
||
logger.debug('...set is done.')
|
||
|
||
return ingest_event
|
||
|
||
async def ingest(self, doc_to_ingest: Doc) -> IngestEvent:
|
||
"""Ingest an existing signed document to the replica
|
||
"""
|
||
|
||
logger.debug('ingest %s', doc_to_ingest)
|
||
|
||
if self._is_closed:
|
||
raise ReplicaIsClosedError()
|
||
|
||
logger.debug('...removing extra fields')
|
||
|
||
try:
|
||
remove_results_or_err = self.format_validator.remove_extra_fields(doc_to_ingest)
|
||
except EarthsnakeError as exc:
|
||
return {
|
||
'kind': "failure",
|
||
'reason': "invalid_document",
|
||
'err': exc,
|
||
'max_local_index': self.replica_driver.get_max_local_index(),
|
||
}
|
||
|
||
doc_to_ingest = remove_results_or_err.doc # a copy of doc without extra fields
|
||
extra_fields = remove_results_or_err.extras # any extra fields starting with underscores
|
||
if extra_fields:
|
||
logger.debug('...extra fields found: %s', J(extra_fields))
|
||
|
||
try:
|
||
# now actually check doc validity against core schema
|
||
self.format_validator.check_document_is_valid(doc_to_ingest)
|
||
except EarthsnakeError as exc:
|
||
return {
|
||
'kind': "failure",
|
||
'reason': "invalid_document",
|
||
'err': exc,
|
||
'max_local_index': self.replica_driver.get_max_local_index(),
|
||
}
|
||
|
||
async def write_to_driver_with_lock() -> IngestEvent:
|
||
# get other docs at the same path
|
||
logger.debug(' >> ingest: start of protected region')
|
||
logger.debug(' > getting other history docs at the same path by any author')
|
||
existing_docs_same_path = await self.get_all_docs_at_path(doc_to_ingest.path)
|
||
logger.debug(' > ...got %d', len(existing_docs_same_path))
|
||
|
||
logger.debug(' > getting prevLatest and prevSameAuthor')
|
||
prev_latest: Optional[Document] = existing_docs_same_path[0] if existing_docs_same_path else None
|
||
prev_same_author: Optional[Document] = [
|
||
document
|
||
for document in existing_docs_same_path
|
||
if document.author == doc_to_ingest.author
|
||
][0] or None
|
||
|
||
logger.debug(' > checking if new doc is latest at this path')
|
||
existing_docs_same_path.push(doc_to_ingest)
|
||
existing_docs_same_path.sort(doc_compare_newest_first)
|
||
is_latest = existing_docs_same_path[0] == doc_to_ingest
|
||
logger.debug(' > ...isLatest: %s', is_latest)
|
||
|
||
if not is_latest and prev_same_author is not None:
|
||
logger.debug(
|
||
' > new doc is not latest and there is another one from the same author...'
|
||
)
|
||
# check if this is obsolete or redudant from the same author
|
||
doc_comp = doc_compare_newest_first(doc_to_ingest, prev_same_author)
|
||
|
||
if doc_comp == Cmp.GT:
|
||
logger.debug(' > new doc is GT prevSameAuthor, so it is obsolete')
|
||
|
||
return {
|
||
'kind': "nothing_happened",
|
||
'reason': "obsolete_from_same_author",
|
||
'doc': doc_to_ingest,
|
||
'max_local_index': self.replica_driver.get_max_local_index(),
|
||
}
|
||
|
||
if doc_comp == Cmp.EQ:
|
||
logger.debug(
|
||
' > new doc is EQ prevSameAuthor, so it is redundant (already_had_it)',
|
||
)
|
||
|
||
return {
|
||
'kind': "nothing_happened",
|
||
'reason': "already_had_it",
|
||
'doc': doc_to_ingest,
|
||
'max_local_index': self.replica_driver.get_max_local_index(),
|
||
}
|
||
|
||
# save it
|
||
logger.debug(" > upserting into ReplicaDriver...")
|
||
# TODO: pass existing_docs_same_path to save another lookup
|
||
doc_as_written = await self.replica_driver.upsert(doc_to_ingest)
|
||
logger.debug(" > ...done upserting into ReplicaDriver")
|
||
logger.debug(" > ...getting ReplicaDriver maxLocalIndex...")
|
||
max_local_index = self.replica_driver.get_max_local_index()
|
||
|
||
logger.debug(
|
||
' >> ingest: end of protected region, returning a WriteEvent from the lock'
|
||
)
|
||
|
||
return {
|
||
'kind': "success",
|
||
'max_local_index': max_local_index,
|
||
'doc': doc_as_written, # with updated extra properties like _localIndex
|
||
'doc_is_latest': is_latest,
|
||
'prev_doc_from_same_author': prev_same_author,
|
||
'prev_latest_doc': prev_latest,
|
||
}
|
||
|
||
logger.debug(" >> ingest: running protected region...")
|
||
ingest_event: IngestEvent = await self._ingest_lock.run(
|
||
write_to_driver_with_lock,
|
||
)
|
||
logger.debug(" >> ingest: ...done running protected region")
|
||
|
||
logger.debug("...send ingest event after releasing the lock")
|
||
logger.debug("...ingest event: %s", ingest_event)
|
||
await self.bus.send_and_wait(
|
||
'ingest|{doc_to_ingest.path}',
|
||
ingest_event,
|
||
) # include the path in the channel even on failures
|
||
|
||
return ingest_event
|
||
|
||
async def overwrite_all_docs_by_author(self, keypair: Identity) -> int:
|
||
"""Overwrite every document from this author, including history versions, with an empty doc
|
||
|
||
:returns: the number of documents changed, or -1 if there was an error.
|
||
"""
|
||
|
||
logger.debug('overwriteAllDocsByAuthor("%s")', keypair.address)
|
||
|
||
if self._is_closed:
|
||
raise ReplicaIsClosedError()
|
||
|
||
# TODO: do this in batches
|
||
query = Query(
|
||
flt={'author': keypair.address},
|
||
history_mode=HistoryMode.ALL,
|
||
)
|
||
docs_to_overwrite = await self.query_docs(query)
|
||
logger.debug(' ...found %d docs to overwrite', len(docs_to_overwrite))
|
||
num_overwritten = 0
|
||
num_already_empty = 0
|
||
|
||
for doc in docs_to_overwrite:
|
||
if not doc.content:
|
||
num_already_empty += 1
|
||
|
||
continue
|
||
|
||
# remove extra fields
|
||
cleaned_result = self.format_validator.remove_extra_fields(doc)
|
||
cleaned_doc = cleaned_result.doc
|
||
|
||
# make new doc which is empty and just barely newer than the original
|
||
empty_doc = Document(
|
||
cleaned_doc,
|
||
content='',
|
||
content_hash=await Crypto.sha256base32(''),
|
||
timestamp=doc.timestamp + 1,
|
||
signature='?',
|
||
)
|
||
|
||
try:
|
||
# sign and ingest it
|
||
signed_doc = await self.format_validator.sign_document(keypair, empty_doc)
|
||
except EarthsnakeError:
|
||
return signed_doc
|
||
|
||
ingest_event = await self.ingest(signed_doc)
|
||
|
||
if ingest_event.kind == 'failure':
|
||
return ValidationError(
|
||
f'ingestion error during overwriteAllDocsBySameAuthor: {ingest_event.reason}: {ingest_event.err}',
|
||
)
|
||
|
||
if ingest_event.kind == 'nothing_happened':
|
||
return ValidationError(
|
||
f'ingestion did nothing during overwriteAllDocsBySameAuthor: {ingest_event.reason}',
|
||
)
|
||
|
||
# success
|
||
num_overwritten += 1
|
||
|
||
logger.debug(
|
||
' ...done; %d overwritten to be empty; %d were already empty; out of total %d docs',
|
||
num_overwritten,
|
||
num_already_empty,
|
||
len(docs_to_overwrite)
|
||
)
|
||
|
||
return num_overwritten
|