from abc import ABC, abstractmethod from datetime import datetime from enum import Enum, auto import json import logging from typing import List, Literal, Mapping, Optional, Tuple, Union #import { Lock, Superbus } from "../../deps.ts"; from .types import Cmp #import { # LocalIndex, # ShareAddress, #} from "../util/doc-types.ts"; from .document import Document, IncompleteDocument from .identity import Identity from .path import Path from .share import Share from .query import HistoryMode, Query #import { # IngestEvent, # IReplica, # IReplicaDriver, # ReplicaBusChannel, # ReplicaId, #} from "./replica-types.ts"; #import { IFormatValidator } from "../format-validators/format-validator-types.ts"; from .format_validator import FormatValidatorBase from .exc import EarthsnakeError, ReplicaIsClosedError, ValidationError #import { randomId } from "../util/misc.ts"; from .util import microsecond_now, random_id #import { compareArrays } from "./compare.ts"; #import { checkShareIsValid } from "../core-validators/addresses.ts"; # #import { Crypto } from "../crypto/crypto.ts"; # # -------------------------------------------------- # #import { Logger } from "../util/log.ts"; J = json.dumps logger = logging.getLogger(__name__) # # ================================================================================ class IngestEventKind(Enum): failure = auto() nothing_happened = auto() success = auto() class IngestEventBase: kind: IngestEventKind max_local_index: int class IngestEventFailure(IngestEventBase): reason: Literal['write_error', 'invalid_document'] err: Optional[Exception] class IngestEventNothingHappened(IngestEventBase): reason: Literal['obsolete_from_same_author', 'already_had_it'] doc: Document class IngestEventSuccess(IngestEventBase): doc: Document doc_is_latest: bool prev_doc_from_same_author: Optional[Document] prev_latest_doc: Optional[Document] IngestEvent = Union[IngestEventFailure, IngestEventNothingHappened, IngestEventSuccess] def doc_compare_newest_first(a: Document, b: Document) -> Cmp: # Sorts by timestamp DESC (newest fist) and breaks ties using the signature ASC. return compare_arrays( [a.timestamp, a.signature], [b.timestamp, a.signature], ["DESC", "ASC"], ) class Replica: # (IReplica): """A replica of a share's data, used to read, write, and synchronise data to Should be closed using the `close` method when no longer being used. .. code-block:: python replica = Replica('+a.a123', FormatValidator.ES4, MemoryReplicaDriver()) """ replica_id: str # todo: save it to the driver too, and reload it when starting up #: The address of the share this replica belongs to share: Share #: The validator used to validate ingested documents format_validator: FormatValidatorBase replica_driver: ReplicaDriverBase bus: Superbus[ReplicaBusChannel] _is_closed = False _ingest_lock: Lock[IngestEvent] def __init__( self, share: Union[str, Workspace], validator: FormatValidatorBase, driver: ReplicaDriverBase, ) -> None: if isinstance(share, str): share = Workspace.from_string(share) logger.debug( 'constructor. driver = %s', driver.__class__.__name__, ) # If we got a class instead of an actual driver object, let’s instantiate the driver if isinstance(driver, type): driver = driver(share) self.replica_id = 'replica-' + random_id() self.share = share self.format_validator = validator self.replica_driver = driver self.bus = Superbus('|') self._ingest_lock = Lock() # -------------------------------------------------- # LIFECYCLE def is_closed(self) -> bool: """Returns whether the replica is closed or not """ return self._is_closed async def close(self, erase: bool) -> None: """Closes the replica, preventing new documents from being ingested or events being emitted Any methods called after closing will return `ReplicaIsClosedError` :param erase: Erase the contents of the replica. Defaults to `false` """ logger.debug('closing...') if self._is_closed: raise ReplicaIsClosedError() # TODO: do this all in a lock? logger.debug(' sending willClose blockingly...') await self.bus.send_and_wait("willClose") logger.debug(' marking self as closed...') self._is_closed = True logger.debug(' closing ReplicaDriver (erase = %s)...', erase) await self.replica_driver.close(erase) logger.debug(' sending didClose nonblockingly...') await self.bus.send_and_wait('didClose') logger.debug('...closing done') # -------------------------------------------------- # CONFIG async def get_config(self, key: str) -> Optional[str]: """Get a specific config value""" if self._is_closed: raise ReplicaIsClosedError() return await self.replica_driver.get_config(key) async def set_config(self, key: str, value: str) -> None: """Set a specific configuration value""" if self._is_closed: raise ReplicaIsClosedError() return await self.replica_driver.set_config(key, value) async def list_config_keys(self, ) -> List[str]: """List all available configuration keys """ if self._is_closed: raise ReplicaIsClosedError() return await self.replica_driver.list_configKeys() async def delete_config(self, key: str) -> bool: """Delete a key from the configuration """ if self._is_closed: raise ReplicaIsClosedError() return await self.replica_driver.delete_config(key) # -------------------------------------------------- # GET def get_max_local_index(self) -> int: """Returns the max local index of all stored documents """ if self._is_closed: raise ReplicaIsClosedError() return self.replica_driver.get_max_local_index() async def get_docs_after_local_index( self, history_mode: HistoryMode, start_after: int, limit: Optional[int] = None, ) -> List[Document]: """Get all documents after a specific index """ logger.debug( 'get_docs_after_local_index(%s, %s, %d)', history_mode, start_after, limit or -1, ) if self._is_closed: raise ReplicaIsClosedError() query: Query = { 'history_mode': history_mode, 'order_by': 'localIndex ASC', 'start_after': { 'local_index': start_after, }, 'limit': limit, } return await self.replica_driver.query_docs(query) async def get_all_docs(self) -> List[Document]: """Returns all documents, including historical versions of documents by other identities """ logger.debug('get_all_docs()') if self._is_closed: raise ReplicaIsClosedError() return await self.replica_driver.query_docs( history_mode=HistoryMode.ALL, order_by='path ASC', ) async def get_latest_docs(self) -> List[Document]: """Returns latest document from every path """ logger.debug('get_latest_docs()') if self._is_closed: raise ReplicaIsClosedError() return await self.replica_driver.query_docs( history_mode=HistoryMode.LATEST, order_by='path ASC', ) async def get_all_docs_at_path(self, path: Path) -> List[Document]: """Returns all versions of a document by different authors from a specific path """ logger.debug('get_all_docs_at_path("%s")', path) if self._is_closed: raise ReplicaIsClosedError() return await self.replica_driver.query_docs( history_mode=HistoryMode.ALL, order_by='path ASC', filter={'path': path}, ) async def get_latest_doc_at_path(self, path: Path) -> Optional[Document]: """Returns the most recently written version of a document at a path""" logger.debug('get_latest_docs_at_path("%s")', path) if self._is_closed: raise ReplicaIsClosedError() docs = await self.replica_driver.query_docs( history_mode=HistoryMode.LATEST, order_by='path ASC', filter={'path': path}, ) if not docs: return None return docs[0] async def query_docs(self, query: Optional[Query] = None) -> List[Document]: """Returns an array of docs for a given query .. code-block:: python my_query = { 'filter': { 'path_ends_with': '.txt' }, 'limit': 5, } first_five_text_docs = await my_replica.query_docs(my_query) """ logger.debug('queryDocs %s', query) if self._is_closed: raise ReplicaIsClosedError() return await self.replica_driver.query_docs(query) # def query_paths(query: Optional[Query]) -> List[Path]: pass # def query_authors(query: Optional[Query]) -> List[AuthorAddress]: pass # -------------------------------------------------- # SET async def set( self, keypair: Identity, doc_to_set: IncompleteDocument, ) -> IngestEvent: """Adds a new document to the replica If a document signed by the same identity exists at the same path, it will be overwritten. """ logger.debug('set %s', doc_to_set) if self._is_closed: raise ReplicaIsClosedError() logger.debug( '...deciding timestamp: getting latest doc at the same path (from any author)', ) timestamp: int if isinstance(doc_to_set.timestamp, int): timestamp = doc_to_set.timestamp logger.debug('...docToSet already has a timestamp; not changing it from %d', timestamp) else: # bump timestamp if needed to win over existing latest doc at same path latest_doc_same_path = await self.get_latest_doc_at_path(doc_to_set.path) if latest_doc_same_path is None: timestamp = microsecond_now() logger.debug( '...no existing latest doc, setting timestamp to now() = %s', timestamp ) else: timestamp = max(microsecond_now(), latest_doc_same_path.timestamp + 1) logger.debug( '...existing latest doc found, bumping timestamp to win if needed = %s', timestamp, ) doc = Document( format=ValidationFormat.ES4, author=keypair.address, content=doc_to_set.content, content_hash=await Crypto.sha256base32(doc_to_set.content), delete_after=doc_to_set.delete_after or None, path=doc_to_set.path, timestamp=timestamp, workspace=self.share, signature='?', # signature will be added in just a moment # _localIndex will be added during upsert. it's not needed for the signature. ) logger.debug('...signing doc') try: signed_doc = await self.format_validator.sign_document(keypair, doc) except EarthsnakeError: return { 'kind': 'failure', 'reason': 'invalid_document', 'err': signed_doc, 'max_local_index': self.replica_driver.get_max_local_index(), } logger.debug('...signature = %s', signed_doc.signature) logger.debug('...ingesting') logger.debug('-----------------------') ingest_event = await self.ingest(signed_doc) logger.debug('-----------------------') logger.debug('...done ingesting') logger.debug('...set is done.') return ingest_event async def ingest(self, doc_to_ingest: Doc) -> IngestEvent: """Ingest an existing signed document to the replica """ logger.debug('ingest %s', doc_to_ingest) if self._is_closed: raise ReplicaIsClosedError() logger.debug('...removing extra fields') try: remove_results_or_err = self.format_validator.remove_extra_fields(doc_to_ingest) except EarthsnakeError as exc: return { 'kind': "failure", 'reason': "invalid_document", 'err': exc, 'max_local_index': self.replica_driver.get_max_local_index(), } doc_to_ingest = remove_results_or_err.doc # a copy of doc without extra fields extra_fields = remove_results_or_err.extras # any extra fields starting with underscores if extra_fields: logger.debug('...extra fields found: %s', J(extra_fields)) try: # now actually check doc validity against core schema self.format_validator.check_document_is_valid(doc_to_ingest) except EarthsnakeError as exc: return { 'kind': "failure", 'reason': "invalid_document", 'err': exc, 'max_local_index': self.replica_driver.get_max_local_index(), } async def write_to_driver_with_lock() -> IngestEvent: # get other docs at the same path logger.debug(' >> ingest: start of protected region') logger.debug(' > getting other history docs at the same path by any author') existing_docs_same_path = await self.get_all_docs_at_path(doc_to_ingest.path) logger.debug(' > ...got %d', len(existing_docs_same_path)) logger.debug(' > getting prevLatest and prevSameAuthor') prev_latest: Optional[Document] = existing_docs_same_path[0] if existing_docs_same_path else None prev_same_author: Optional[Document] = [ document for document in existing_docs_same_path if document.author == doc_to_ingest.author ][0] or None logger.debug(' > checking if new doc is latest at this path') existing_docs_same_path.push(doc_to_ingest) existing_docs_same_path.sort(doc_compare_newest_first) is_latest = existing_docs_same_path[0] == doc_to_ingest logger.debug(' > ...isLatest: %s', is_latest) if not is_latest and prev_same_author is not None: logger.debug( ' > new doc is not latest and there is another one from the same author...' ) # check if this is obsolete or redudant from the same author doc_comp = doc_compare_newest_first(doc_to_ingest, prev_same_author) if doc_comp == Cmp.GT: logger.debug(' > new doc is GT prevSameAuthor, so it is obsolete') return { 'kind': "nothing_happened", 'reason': "obsolete_from_same_author", 'doc': doc_to_ingest, 'max_local_index': self.replica_driver.get_max_local_index(), } if doc_comp == Cmp.EQ: logger.debug( ' > new doc is EQ prevSameAuthor, so it is redundant (already_had_it)', ) return { 'kind': "nothing_happened", 'reason': "already_had_it", 'doc': doc_to_ingest, 'max_local_index': self.replica_driver.get_max_local_index(), } # save it logger.debug(" > upserting into ReplicaDriver...") # TODO: pass existing_docs_same_path to save another lookup doc_as_written = await self.replica_driver.upsert(doc_to_ingest) logger.debug(" > ...done upserting into ReplicaDriver") logger.debug(" > ...getting ReplicaDriver maxLocalIndex...") max_local_index = self.replica_driver.get_max_local_index() logger.debug( ' >> ingest: end of protected region, returning a WriteEvent from the lock' ) return { 'kind': "success", 'max_local_index': max_local_index, 'doc': doc_as_written, # with updated extra properties like _localIndex 'doc_is_latest': is_latest, 'prev_doc_from_same_author': prev_same_author, 'prev_latest_doc': prev_latest, } logger.debug(" >> ingest: running protected region...") ingest_event: IngestEvent = await self._ingest_lock.run( write_to_driver_with_lock, ) logger.debug(" >> ingest: ...done running protected region") logger.debug("...send ingest event after releasing the lock") logger.debug("...ingest event: %s", ingest_event) await self.bus.send_and_wait( 'ingest|{doc_to_ingest.path}', ingest_event, ) # include the path in the channel even on failures return ingest_event async def overwrite_all_docs_by_author(self, keypair: Identity) -> int: """Overwrite every document from this author, including history versions, with an empty doc :returns: the number of documents changed, or -1 if there was an error. """ logger.debug('overwriteAllDocsByAuthor("%s")', keypair.address) if self._is_closed: raise ReplicaIsClosedError() # TODO: do this in batches query = Query( flt={'author': keypair.address}, history_mode=HistoryMode.ALL, ) docs_to_overwrite = await self.query_docs(query) logger.debug(' ...found %d docs to overwrite', len(docs_to_overwrite)) num_overwritten = 0 num_already_empty = 0 for doc in docs_to_overwrite: if not doc.content: num_already_empty += 1 continue # remove extra fields cleaned_result = self.format_validator.remove_extra_fields(doc) cleaned_doc = cleaned_result.doc # make new doc which is empty and just barely newer than the original empty_doc = Document( cleaned_doc, content='', content_hash=await Crypto.sha256base32(''), timestamp=doc.timestamp + 1, signature='?', ) try: # sign and ingest it signed_doc = await self.format_validator.sign_document(keypair, empty_doc) except EarthsnakeError: return signed_doc ingest_event = await self.ingest(signed_doc) if ingest_event.kind == 'failure': return ValidationError( f'ingestion error during overwriteAllDocsBySameAuthor: {ingest_event.reason}: {ingest_event.err}', ) if ingest_event.kind == 'nothing_happened': return ValidationError( f'ingestion did nothing during overwriteAllDocsBySameAuthor: {ingest_event.reason}', ) # success num_overwritten += 1 logger.debug( ' ...done; %d overwritten to be empty; %d were already empty; out of total %d docs', num_overwritten, num_already_empty, len(docs_to_overwrite) ) return num_overwritten