The SSH host key has changed on 8 April, 2022 to this one: SHA256:573uTBSeh74kvOo0HJXi5ijdzRm8me27suzNEDlGyrQ
Python implementation of [Earthstar](https://earthstar-project.org/)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
earthsnake/earthsnake/replica.py

598 lines
20 KiB

from abc import ABC, abstractmethod
from datetime import datetime
from enum import Enum, auto
import json
import logging
from typing import List, Literal, Mapping, Optional, Tuple, Union
#import { Lock, Superbus } from "../../deps.ts";
from .types import Cmp
#import {
# LocalIndex,
# ShareAddress,
#} from "../util/doc-types.ts";
from .document import Document, IncompleteDocument
from .identity import Identity
from .path import Path
from .share import Share
from .query import HistoryMode, Query
#import {
# IngestEvent,
# IReplica,
# IReplicaDriver,
# ReplicaBusChannel,
# ReplicaId,
#} from "./replica-types.ts";
#import { IFormatValidator } from "../format-validators/format-validator-types.ts";
from .format_validator import FormatValidatorBase
from .exc import EarthsnakeError, ReplicaIsClosedError, ValidationError
#import { randomId } from "../util/misc.ts";
from .util import microsecond_now, random_id
#import { compareArrays } from "./compare.ts";
#import { checkShareIsValid } from "../core-validators/addresses.ts";
#
#import { Crypto } from "../crypto/crypto.ts";
#
# --------------------------------------------------
#
#import { Logger } from "../util/log.ts";
J = json.dumps
logger = logging.getLogger(__name__)
#
# ================================================================================
class IngestEventKind(Enum):
failure = auto()
nothing_happened = auto()
success = auto()
class IngestEventBase:
kind: IngestEventKind
max_local_index: int
class IngestEventFailure(IngestEventBase):
reason: Literal['write_error', 'invalid_document']
err: Optional[Exception]
class IngestEventNothingHappened(IngestEventBase):
reason: Literal['obsolete_from_same_author', 'already_had_it']
doc: Document
class IngestEventSuccess(IngestEventBase):
doc: Document
doc_is_latest: bool
prev_doc_from_same_author: Optional[Document]
prev_latest_doc: Optional[Document]
IngestEvent = Union[IngestEventFailure, IngestEventNothingHappened, IngestEventSuccess]
def doc_compare_newest_first(a: Document, b: Document) -> Cmp:
# Sorts by timestamp DESC (newest fist) and breaks ties using the signature ASC.
return compare_arrays(
[a.timestamp, a.signature],
[b.timestamp, a.signature],
["DESC", "ASC"],
)
class Replica: # (IReplica):
"""A replica of a share's data, used to read, write, and synchronise data to
Should be closed using the `close` method when no longer being used.
.. code-block:: python
replica = Replica('+a.a123', FormatValidator.ES4, MemoryReplicaDriver())
"""
replica_id: str # todo: save it to the driver too, and reload it when starting up
#: The address of the share this replica belongs to
share: Share
#: The validator used to validate ingested documents
format_validator: FormatValidatorBase
replica_driver: ReplicaDriverBase
bus: Superbus[ReplicaBusChannel]
_is_closed = False
_ingest_lock: Lock[IngestEvent]
def __init__(
self,
share: Union[str, Workspace],
validator: FormatValidatorBase,
driver: ReplicaDriverBase,
) -> None:
if isinstance(share, str):
share = Workspace.from_string(share)
logger.debug(
'constructor. driver = %s',
driver.__class__.__name__,
)
# If we got a class instead of an actual driver object, let’s instantiate the driver
if isinstance(driver, type):
driver = driver(share)
self.replica_id = 'replica-' + random_id()
self.share = share
self.format_validator = validator
self.replica_driver = driver
self.bus = Superbus('|')
self._ingest_lock = Lock()
# --------------------------------------------------
# LIFECYCLE
def is_closed(self) -> bool:
"""Returns whether the replica is closed or not
"""
return self._is_closed
async def close(self, erase: bool) -> None:
"""Closes the replica, preventing new documents from being ingested or events being emitted
Any methods called after closing will return `ReplicaIsClosedError`
:param erase: Erase the contents of the replica. Defaults to `false`
"""
logger.debug('closing...')
if self._is_closed:
raise ReplicaIsClosedError()
# TODO: do this all in a lock?
logger.debug(' sending willClose blockingly...')
await self.bus.send_and_wait("willClose")
logger.debug(' marking self as closed...')
self._is_closed = True
logger.debug(' closing ReplicaDriver (erase = %s)...', erase)
await self.replica_driver.close(erase)
logger.debug(' sending didClose nonblockingly...')
await self.bus.send_and_wait('didClose')
logger.debug('...closing done')
# --------------------------------------------------
# CONFIG
async def get_config(self, key: str) -> Optional[str]:
"""Get a specific config value"""
if self._is_closed:
raise ReplicaIsClosedError()
return await self.replica_driver.get_config(key)
async def set_config(self, key: str, value: str) -> None:
"""Set a specific configuration value"""
if self._is_closed:
raise ReplicaIsClosedError()
return await self.replica_driver.set_config(key, value)
async def list_config_keys(self, ) -> List[str]:
"""List all available configuration keys
"""
if self._is_closed:
raise ReplicaIsClosedError()
return await self.replica_driver.list_configKeys()
async def delete_config(self, key: str) -> bool:
"""Delete a key from the configuration
"""
if self._is_closed:
raise ReplicaIsClosedError()
return await self.replica_driver.delete_config(key)
# --------------------------------------------------
# GET
def get_max_local_index(self) -> int:
"""Returns the max local index of all stored documents
"""
if self._is_closed:
raise ReplicaIsClosedError()
return self.replica_driver.get_max_local_index()
async def get_docs_after_local_index(
self,
history_mode: HistoryMode,
start_after: int,
limit: Optional[int] = None,
) -> List[Document]:
"""Get all documents after a specific index
"""
logger.debug(
'get_docs_after_local_index(%s, %s, %d)',
history_mode,
start_after,
limit or -1,
)
if self._is_closed:
raise ReplicaIsClosedError()
query: Query = {
'history_mode': history_mode,
'order_by': 'localIndex ASC',
'start_after': {
'local_index': start_after,
},
'limit': limit,
}
return await self.replica_driver.query_docs(query)
async def get_all_docs(self) -> List[Document]:
"""Returns all documents, including historical versions of documents by other identities
"""
logger.debug('get_all_docs()')
if self._is_closed:
raise ReplicaIsClosedError()
return await self.replica_driver.query_docs(
history_mode=HistoryMode.ALL,
order_by='path ASC',
)
async def get_latest_docs(self) -> List[Document]:
"""Returns latest document from every path
"""
logger.debug('get_latest_docs()')
if self._is_closed:
raise ReplicaIsClosedError()
return await self.replica_driver.query_docs(
history_mode=HistoryMode.LATEST,
order_by='path ASC',
)
async def get_all_docs_at_path(self, path: Path) -> List[Document]:
"""Returns all versions of a document by different authors from a specific path
"""
logger.debug('get_all_docs_at_path("%s")', path)
if self._is_closed:
raise ReplicaIsClosedError()
return await self.replica_driver.query_docs(
history_mode=HistoryMode.ALL,
order_by='path ASC',
filter={'path': path},
)
async def get_latest_doc_at_path(self, path: Path) -> Optional[Document]:
"""Returns the most recently written version of a document at a path"""
logger.debug('get_latest_docs_at_path("%s")', path)
if self._is_closed:
raise ReplicaIsClosedError()
docs = await self.replica_driver.query_docs(
history_mode=HistoryMode.LATEST,
order_by='path ASC',
filter={'path': path},
)
if not docs:
return None
return docs[0]
async def query_docs(self, query: Optional[Query] = None) -> List[Document]:
"""Returns an array of docs for a given query
.. code-block:: python
my_query = {
'filter': {
'path_ends_with': '.txt'
},
'limit': 5,
}
first_five_text_docs = await my_replica.query_docs(my_query)
"""
logger.debug('queryDocs %s', query)
if self._is_closed:
raise ReplicaIsClosedError()
return await self.replica_driver.query_docs(query)
# def query_paths(query: Optional[Query]) -> List[Path]: pass
# def query_authors(query: Optional[Query]) -> List[AuthorAddress]: pass
# --------------------------------------------------
# SET
async def set(
self,
keypair: Identity,
doc_to_set: IncompleteDocument,
) -> IngestEvent:
"""Adds a new document to the replica
If a document signed by the same identity exists at the same path, it will be overwritten.
"""
logger.debug('set %s', doc_to_set)
if self._is_closed:
raise ReplicaIsClosedError()
logger.debug(
'...deciding timestamp: getting latest doc at the same path (from any author)',
)
timestamp: int
if isinstance(doc_to_set.timestamp, int):
timestamp = doc_to_set.timestamp
logger.debug('...docToSet already has a timestamp; not changing it from %d', timestamp)
else:
# bump timestamp if needed to win over existing latest doc at same path
latest_doc_same_path = await self.get_latest_doc_at_path(doc_to_set.path)
if latest_doc_same_path is None:
timestamp = microsecond_now()
logger.debug(
'...no existing latest doc, setting timestamp to now() = %s', timestamp
)
else:
timestamp = max(microsecond_now(), latest_doc_same_path.timestamp + 1)
logger.debug(
'...existing latest doc found, bumping timestamp to win if needed = %s',
timestamp,
)
doc = Document(
format=ValidationFormat.ES4,
author=keypair.address,
content=doc_to_set.content,
content_hash=await Crypto.sha256base32(doc_to_set.content),
delete_after=doc_to_set.delete_after or None,
path=doc_to_set.path,
timestamp=timestamp,
workspace=self.share,
signature='?', # signature will be added in just a moment
# _localIndex will be added during upsert. it's not needed for the signature.
)
logger.debug('...signing doc')
try:
signed_doc = await self.format_validator.sign_document(keypair, doc)
except EarthsnakeError:
return {
'kind': 'failure',
'reason': 'invalid_document',
'err': signed_doc,
'max_local_index': self.replica_driver.get_max_local_index(),
}
logger.debug('...signature = %s', signed_doc.signature)
logger.debug('...ingesting')
logger.debug('-----------------------')
ingest_event = await self.ingest(signed_doc)
logger.debug('-----------------------')
logger.debug('...done ingesting')
logger.debug('...set is done.')
return ingest_event
async def ingest(self, doc_to_ingest: Doc) -> IngestEvent:
"""Ingest an existing signed document to the replica
"""
logger.debug('ingest %s', doc_to_ingest)
if self._is_closed:
raise ReplicaIsClosedError()
logger.debug('...removing extra fields')
try:
remove_results_or_err = self.format_validator.remove_extra_fields(doc_to_ingest)
except EarthsnakeError as exc:
return {
'kind': "failure",
'reason': "invalid_document",
'err': exc,
'max_local_index': self.replica_driver.get_max_local_index(),
}
doc_to_ingest = remove_results_or_err.doc # a copy of doc without extra fields
extra_fields = remove_results_or_err.extras # any extra fields starting with underscores
if extra_fields:
logger.debug('...extra fields found: %s', J(extra_fields))
try:
# now actually check doc validity against core schema
self.format_validator.check_document_is_valid(doc_to_ingest)
except EarthsnakeError as exc:
return {
'kind': "failure",
'reason': "invalid_document",
'err': exc,
'max_local_index': self.replica_driver.get_max_local_index(),
}
async def write_to_driver_with_lock() -> IngestEvent:
# get other docs at the same path
logger.debug(' >> ingest: start of protected region')
logger.debug(' > getting other history docs at the same path by any author')
existing_docs_same_path = await self.get_all_docs_at_path(doc_to_ingest.path)
logger.debug(' > ...got %d', len(existing_docs_same_path))
logger.debug(' > getting prevLatest and prevSameAuthor')
prev_latest: Optional[Document] = existing_docs_same_path[0] if existing_docs_same_path else None
prev_same_author: Optional[Document] = [
document
for document in existing_docs_same_path
if document.author == doc_to_ingest.author
][0] or None
logger.debug(' > checking if new doc is latest at this path')
existing_docs_same_path.push(doc_to_ingest)
existing_docs_same_path.sort(doc_compare_newest_first)
is_latest = existing_docs_same_path[0] == doc_to_ingest
logger.debug(' > ...isLatest: %s', is_latest)
if not is_latest and prev_same_author is not None:
logger.debug(
' > new doc is not latest and there is another one from the same author...'
)
# check if this is obsolete or redudant from the same author
doc_comp = doc_compare_newest_first(doc_to_ingest, prev_same_author)
if doc_comp == Cmp.GT:
logger.debug(' > new doc is GT prevSameAuthor, so it is obsolete')
return {
'kind': "nothing_happened",
'reason': "obsolete_from_same_author",
'doc': doc_to_ingest,
'max_local_index': self.replica_driver.get_max_local_index(),
}
if doc_comp == Cmp.EQ:
logger.debug(
' > new doc is EQ prevSameAuthor, so it is redundant (already_had_it)',
)
return {
'kind': "nothing_happened",
'reason': "already_had_it",
'doc': doc_to_ingest,
'max_local_index': self.replica_driver.get_max_local_index(),
}
# save it
logger.debug(" > upserting into ReplicaDriver...")
# TODO: pass existing_docs_same_path to save another lookup
doc_as_written = await self.replica_driver.upsert(doc_to_ingest)
logger.debug(" > ...done upserting into ReplicaDriver")
logger.debug(" > ...getting ReplicaDriver maxLocalIndex...")
max_local_index = self.replica_driver.get_max_local_index()
logger.debug(
' >> ingest: end of protected region, returning a WriteEvent from the lock'
)
return {
'kind': "success",
'max_local_index': max_local_index,
'doc': doc_as_written, # with updated extra properties like _localIndex
'doc_is_latest': is_latest,
'prev_doc_from_same_author': prev_same_author,
'prev_latest_doc': prev_latest,
}
logger.debug(" >> ingest: running protected region...")
ingest_event: IngestEvent = await self._ingest_lock.run(
write_to_driver_with_lock,
)
logger.debug(" >> ingest: ...done running protected region")
logger.debug("...send ingest event after releasing the lock")
logger.debug("...ingest event: %s", ingest_event)
await self.bus.send_and_wait(
'ingest|{doc_to_ingest.path}',
ingest_event,
) # include the path in the channel even on failures
return ingest_event
async def overwrite_all_docs_by_author(self, keypair: Identity) -> int:
"""Overwrite every document from this author, including history versions, with an empty doc
:returns: the number of documents changed, or -1 if there was an error.
"""
logger.debug('overwriteAllDocsByAuthor("%s")', keypair.address)
if self._is_closed:
raise ReplicaIsClosedError()
# TODO: do this in batches
query = Query(
flt={'author': keypair.address},
history_mode=HistoryMode.ALL,
)
docs_to_overwrite = await self.query_docs(query)
logger.debug(' ...found %d docs to overwrite', len(docs_to_overwrite))
num_overwritten = 0
num_already_empty = 0
for doc in docs_to_overwrite:
if not doc.content:
num_already_empty += 1
continue
# remove extra fields
cleaned_result = self.format_validator.remove_extra_fields(doc)
cleaned_doc = cleaned_result.doc
# make new doc which is empty and just barely newer than the original
empty_doc = Document(
cleaned_doc,
content='',
content_hash=await Crypto.sha256base32(''),
timestamp=doc.timestamp + 1,
signature='?',
)
try:
# sign and ingest it
signed_doc = await self.format_validator.sign_document(keypair, empty_doc)
except EarthsnakeError:
return signed_doc
ingest_event = await self.ingest(signed_doc)
if ingest_event.kind == 'failure':
return ValidationError(
f'ingestion error during overwriteAllDocsBySameAuthor: {ingest_event.reason}: {ingest_event.err}',
)
if ingest_event.kind == 'nothing_happened':
return ValidationError(
f'ingestion did nothing during overwriteAllDocsBySameAuthor: {ingest_event.reason}',
)
# success
num_overwritten += 1
logger.debug(
' ...done; %d overwritten to be empty; %d were already empty; out of total %d docs',
num_overwritten,
num_already_empty,
len(docs_to_overwrite)
)
return num_overwritten