The SSH host key has changed on 8 April, 2022 to this one: SHA256:573uTBSeh74kvOo0HJXi5ijdzRm8me27suzNEDlGyrQ
Python implementation of [Earthstar](https://earthstar-project.org/)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
earthsnake/earthsnake/replica.py

599 lines
20 KiB

7 months ago
from abc import ABC, abstractmethod
from datetime import datetime
from enum import Enum, auto
8 months ago
import json
7 months ago
import logging
8 months ago
from typing import List, Literal, Mapping, Optional, Tuple, Union
#import { Lock, Superbus } from "../../deps.ts";
from .types import Cmp
#import {
# LocalIndex,
# ShareAddress,
#} from "../util/doc-types.ts";
from .document import Document, IncompleteDocument
7 months ago
from .identity import Identity
from .path import Path
8 months ago
from .share import Share
from .query import HistoryMode, Query
#import {
# IngestEvent,
# IReplica,
# IReplicaDriver,
# ReplicaBusChannel,
# ReplicaId,
#} from "./replica-types.ts";
#import { IFormatValidator } from "../format-validators/format-validator-types.ts";
from .format_validator import FormatValidatorBase
from .exc import EarthsnakeError, ReplicaIsClosedError, ValidationError
#import { randomId } from "../util/misc.ts";
from .util import microsecond_now, random_id
#import { compareArrays } from "./compare.ts";
#import { checkShareIsValid } from "../core-validators/addresses.ts";
#
#import { Crypto } from "../crypto/crypto.ts";
#
# --------------------------------------------------
#
#import { Logger } from "../util/log.ts";
J = json.dumps
7 months ago
logger = logging.getLogger(__name__)
8 months ago
#
# ================================================================================
7 months ago
8 months ago
class IngestEventKind(Enum):
failure = auto()
nothing_happened = auto()
success = auto()
7 months ago
8 months ago
class IngestEventBase:
kind: IngestEventKind
max_local_index: int
7 months ago
8 months ago
class IngestEventFailure(IngestEventBase):
reason: Literal['write_error', 'invalid_document']
err: Optional[Exception]
7 months ago
8 months ago
class IngestEventNothingHappened(IngestEventBase):
reason: Literal['obsolete_from_same_author', 'already_had_it']
doc: Document
7 months ago
8 months ago
class IngestEventSuccess(IngestEventBase):
doc: Document
doc_is_latest: bool
prev_doc_from_same_author: Optional[Document]
prev_latest_doc: Optional[Document]
7 months ago
8 months ago
IngestEvent = Union[IngestEventFailure, IngestEventNothingHappened, IngestEventSuccess]
7 months ago
8 months ago
def doc_compare_newest_first(a: Document, b: Document) -> Cmp:
# Sorts by timestamp DESC (newest fist) and breaks ties using the signature ASC.
return compare_arrays(
[a.timestamp, a.signature],
[b.timestamp, a.signature],
["DESC", "ASC"],
)
7 months ago
8 months ago
class Replica: # (IReplica):
"""A replica of a share's data, used to read, write, and synchronise data to
7 months ago
8 months ago
Should be closed using the `close` method when no longer being used.
7 months ago
8 months ago
.. code-block:: python
7 months ago
8 months ago
replica = Replica('+a.a123', FormatValidator.ES4, MemoryReplicaDriver())
"""
7 months ago
8 months ago
replica_id: str # todo: save it to the driver too, and reload it when starting up
7 months ago
8 months ago
#: The address of the share this replica belongs to
share: Share
7 months ago
8 months ago
#: The validator used to validate ingested documents
format_validator: FormatValidatorBase
replica_driver: ReplicaDriverBase
bus: Superbus[ReplicaBusChannel]
7 months ago
8 months ago
_is_closed = False
_ingest_lock: Lock[IngestEvent]
7 months ago
8 months ago
def __init__(
self,
share: Union[str, Workspace],
validator: FormatValidatorBase,
driver: ReplicaDriverBase,
) -> None:
if isinstance(share, str):
share = Workspace.from_string(share)
7 months ago
8 months ago
logger.debug(
'constructor. driver = %s',
driver.__class__.__name__,
)
7 months ago
8 months ago
# If we got a class instead of an actual driver object, let’s instantiate the driver
if isinstance(driver, type):
driver = driver(share)
7 months ago
8 months ago
self.replica_id = 'replica-' + random_id()
self.share = share
self.format_validator = validator
self.replica_driver = driver
self.bus = Superbus('|')
self._ingest_lock = Lock()
7 months ago
8 months ago
# --------------------------------------------------
# LIFECYCLE
7 months ago
def is_closed(self) -> bool:
8 months ago
"""Returns whether the replica is closed or not
7 months ago
"""
8 months ago
return self._is_closed
7 months ago
8 months ago
async def close(self, erase: bool) -> None:
"""Closes the replica, preventing new documents from being ingested or events being emitted
7 months ago
8 months ago
Any methods called after closing will return `ReplicaIsClosedError`
7 months ago
8 months ago
:param erase: Erase the contents of the replica. Defaults to `false`
7 months ago
"""
8 months ago
logger.debug('closing...')
7 months ago
if self._is_closed:
raise ReplicaIsClosedError()
8 months ago
# TODO: do this all in a lock?
logger.debug(' sending willClose blockingly...')
await self.bus.send_and_wait("willClose")
logger.debug(' marking self as closed...')
7 months ago
self._is_closed = True
8 months ago
logger.debug(' closing ReplicaDriver (erase = %s)...', erase)
await self.replica_driver.close(erase)
logger.debug(' sending didClose nonblockingly...')
await self.bus.send_and_wait('didClose')
logger.debug('...closing done')
# --------------------------------------------------
# CONFIG
async def get_config(self, key: str) -> Optional[str]:
"""Get a specific config value"""
7 months ago
if self._is_closed:
raise ReplicaIsClosedError()
8 months ago
return await self.replica_driver.get_config(key)
async def set_config(self, key: str, value: str) -> None:
"""Set a specific configuration value"""
7 months ago
if self._is_closed:
raise ReplicaIsClosedError()
8 months ago
return await self.replica_driver.set_config(key, value)
7 months ago
8 months ago
async def list_config_keys(self, ) -> List[str]:
"""List all available configuration keys
"""
7 months ago
if self._is_closed:
raise ReplicaIsClosedError()
8 months ago
return await self.replica_driver.list_configKeys()
7 months ago
8 months ago
async def delete_config(self, key: str) -> bool:
"""Delete a key from the configuration
"""
7 months ago
if self._is_closed:
raise ReplicaIsClosedError()
8 months ago
return await self.replica_driver.delete_config(key)
# --------------------------------------------------
# GET
7 months ago
def get_max_local_index(self) -> int:
8 months ago
"""Returns the max local index of all stored documents
"""
7 months ago
if self._is_closed:
raise ReplicaIsClosedError()
return self.replica_driver.get_max_local_index()
8 months ago
async def get_docs_after_local_index(
self,
history_mode: HistoryMode,
start_after: int,
limit: Optional[int] = None,
7 months ago
) -> List[Document]:
8 months ago
"""Get all documents after a specific index
"""
logger.debug(
'get_docs_after_local_index(%s, %s, %d)',
history_mode,
start_after,
limit or -1,
)
7 months ago
if self._is_closed:
raise ReplicaIsClosedError()
8 months ago
query: Query = {
'history_mode': history_mode,
'order_by': 'localIndex ASC',
'start_after': {
'local_index': start_after,
},
'limit': limit,
}
return await self.replica_driver.query_docs(query)
7 months ago
8 months ago
async def get_all_docs(self) -> List[Document]:
"""Returns all documents, including historical versions of documents by other identities
"""
logger.debug('get_all_docs()')
7 months ago
if self._is_closed:
raise ReplicaIsClosedError()
8 months ago
return await self.replica_driver.query_docs(
history_mode=HistoryMode.ALL,
order_by='path ASC',
)
async def get_latest_docs(self) -> List[Document]:
"""Returns latest document from every path
"""
7 months ago
8 months ago
logger.debug('get_latest_docs()')
7 months ago
if self._is_closed:
raise ReplicaIsClosedError()
8 months ago
return await self.replica_driver.query_docs(
history_mode=HistoryMode.LATEST,
order_by='path ASC',
)
async def get_all_docs_at_path(self, path: Path) -> List[Document]:
"""Returns all versions of a document by different authors from a specific path
"""
7 months ago
8 months ago
logger.debug('get_all_docs_at_path("%s")', path)
7 months ago
if self._is_closed:
raise ReplicaIsClosedError()
8 months ago
return await self.replica_driver.query_docs(
history_mode=HistoryMode.ALL,
order_by='path ASC',
filter={'path': path},
)
async def get_latest_doc_at_path(self, path: Path) -> Optional[Document]:
"""Returns the most recently written version of a document at a path"""
7 months ago
8 months ago
logger.debug('get_latest_docs_at_path("%s")', path)
7 months ago
if self._is_closed:
raise ReplicaIsClosedError()
8 months ago
docs = await self.replica_driver.query_docs(
history_mode=HistoryMode.LATEST,
order_by='path ASC',
filter={'path': path},
7 months ago
)
if not docs:
return None
return docs[0]
8 months ago
async def query_docs(self, query: Optional[Query] = None) -> List[Document]:
"""Returns an array of docs for a given query
.. code-block:: python
my_query = {
'filter': {
'path_ends_with': '.txt'
},
'limit': 5,
}
first_five_text_docs = await my_replica.query_docs(my_query)
"""
logger.debug('queryDocs %s', query)
7 months ago
if self._is_closed:
raise ReplicaIsClosedError()
8 months ago
return await self.replica_driver.query_docs(query)
# def query_paths(query: Optional[Query]) -> List[Path]: pass
# def query_authors(query: Optional[Query]) -> List[AuthorAddress]: pass
# --------------------------------------------------
# SET
async def set(
self,
keypair: Identity,
doc_to_set: IncompleteDocument,
) -> IngestEvent:
"""Adds a new document to the replica
If a document signed by the same identity exists at the same path, it will be overwritten.
"""
logger.debug('set %s', doc_to_set)
7 months ago
if self._is_closed:
raise ReplicaIsClosedError()
8 months ago
logger.debug(
'...deciding timestamp: getting latest doc at the same path (from any author)',
)
7 months ago
timestamp: int
8 months ago
if isinstance(doc_to_set.timestamp, int):
7 months ago
timestamp = doc_to_set.timestamp
8 months ago
logger.debug('...docToSet already has a timestamp; not changing it from %d', timestamp)
7 months ago
else:
8 months ago
# bump timestamp if needed to win over existing latest doc at same path
latest_doc_same_path = await self.get_latest_doc_at_path(doc_to_set.path)
7 months ago
if latest_doc_same_path is None:
timestamp = microsecond_now()
8 months ago
logger.debug(
'...no existing latest doc, setting timestamp to now() = %s', timestamp
)
7 months ago
else:
timestamp = max(microsecond_now(), latest_doc_same_path.timestamp + 1)
8 months ago
logger.debug(
'...existing latest doc found, bumping timestamp to win if needed = %s',
timestamp,
)
7 months ago
doc = Document(
8 months ago
format=ValidationFormat.ES4,
author=keypair.address,
7 months ago
content=doc_to_set.content,
8 months ago
content_hash=await Crypto.sha256base32(doc_to_set.content),
delete_after=doc_to_set.delete_after or None,
path=doc_to_set.path,
7 months ago
timestamp=timestamp,
8 months ago
workspace=self.share,
signature='?', # signature will be added in just a moment
# _localIndex will be added during upsert. it's not needed for the signature.
7 months ago
)
8 months ago
logger.debug('...signing doc')
7 months ago
try:
8 months ago
signed_doc = await self.format_validator.sign_document(keypair, doc)
except EarthsnakeError:
return {
'kind': 'failure',
'reason': 'invalid_document',
'err': signed_doc,
'max_local_index': self.replica_driver.get_max_local_index(),
}
logger.debug('...signature = %s', signed_doc.signature)
logger.debug('...ingesting')
logger.debug('-----------------------')
ingest_event = await self.ingest(signed_doc)
logger.debug('-----------------------')
logger.debug('...done ingesting')
logger.debug('...set is done.')
return ingest_event
7 months ago
8 months ago
async def ingest(self, doc_to_ingest: Doc) -> IngestEvent:
"""Ingest an existing signed document to the replica
"""
logger.debug('ingest %s', doc_to_ingest)
7 months ago
if self._is_closed:
raise ReplicaIsClosedError()
8 months ago
logger.debug('...removing extra fields')
7 months ago
try:
8 months ago
remove_results_or_err = self.format_validator.remove_extra_fields(doc_to_ingest)
7 months ago
except EarthsnakeError as exc:
8 months ago
return {
'kind': "failure",
'reason': "invalid_document",
'err': exc,
'max_local_index': self.replica_driver.get_max_local_index(),
}
doc_to_ingest = remove_results_or_err.doc # a copy of doc without extra fields
extra_fields = remove_results_or_err.extras # any extra fields starting with underscores
7 months ago
if extra_fields:
8 months ago
logger.debug('...extra fields found: %s', J(extra_fields))
7 months ago
try:
8 months ago
# now actually check doc validity against core schema
7 months ago
self.format_validator.check_document_is_valid(doc_to_ingest)
except EarthsnakeError as exc:
8 months ago
return {
'kind': "failure",
'reason': "invalid_document",
'err': exc,
'max_local_index': self.replica_driver.get_max_local_index(),
}
async def write_to_driver_with_lock() -> IngestEvent:
# get other docs at the same path
logger.debug(' >> ingest: start of protected region')
logger.debug(' > getting other history docs at the same path by any author')
existing_docs_same_path = await self.get_all_docs_at_path(doc_to_ingest.path)
logger.debug(' > ...got %d', len(existing_docs_same_path))
logger.debug(' > getting prevLatest and prevSameAuthor')
prev_latest: Optional[Document] = existing_docs_same_path[0] if existing_docs_same_path else None
prev_same_author: Optional[Document] = [
document
for document in existing_docs_same_path
if document.author == doc_to_ingest.author
][0] or None
logger.debug(' > checking if new doc is latest at this path')
existing_docs_same_path.push(doc_to_ingest)
existing_docs_same_path.sort(doc_compare_newest_first)
7 months ago
is_latest = existing_docs_same_path[0] == doc_to_ingest
8 months ago
logger.debug(' > ...isLatest: %s', is_latest)
7 months ago
8 months ago
if not is_latest and prev_same_author is not None:
logger.debug(
' > new doc is not latest and there is another one from the same author...'
)
# check if this is obsolete or redudant from the same author
doc_comp = doc_compare_newest_first(doc_to_ingest, prev_same_author)
7 months ago
if doc_comp == Cmp.GT:
8 months ago
logger.debug(' > new doc is GT prevSameAuthor, so it is obsolete')
return {
'kind': "nothing_happened",
'reason': "obsolete_from_same_author",
'doc': doc_to_ingest,
'max_local_index': self.replica_driver.get_max_local_index(),
}
7 months ago
if doc_comp == Cmp.EQ:
8 months ago
logger.debug(
' > new doc is EQ prevSameAuthor, so it is redundant (already_had_it)',
7 months ago
)
8 months ago
return {
'kind': "nothing_happened",
'reason': "already_had_it",
'doc': doc_to_ingest,
'max_local_index': self.replica_driver.get_max_local_index(),
}
# save it
logger.debug(" > upserting into ReplicaDriver...")
# TODO: pass existing_docs_same_path to save another lookup
doc_as_written = await self.replica_driver.upsert(doc_to_ingest)
logger.debug(" > ...done upserting into ReplicaDriver")
logger.debug(" > ...getting ReplicaDriver maxLocalIndex...")
7 months ago
max_local_index = self.replica_driver.get_max_local_index()
8 months ago
logger.debug(
' >> ingest: end of protected region, returning a WriteEvent from the lock'
7 months ago
)
8 months ago
return {
'kind': "success",
'max_local_index': max_local_index,
'doc': doc_as_written, # with updated extra properties like _localIndex
'doc_is_latest': is_latest,
'prev_doc_from_same_author': prev_same_author,
'prev_latest_doc': prev_latest,
}
logger.debug(" >> ingest: running protected region...")
ingest_event: IngestEvent = await self._ingest_lock.run(
write_to_driver_with_lock,
)
logger.debug(" >> ingest: ...done running protected region")
7 months ago
8 months ago
logger.debug("...send ingest event after releasing the lock")
logger.debug("...ingest event: %s", ingest_event)
await self.bus.send_and_wait(
'ingest|{doc_to_ingest.path}',
ingest_event,
) # include the path in the channel even on failures
7 months ago
return ingest_event
8 months ago
async def overwrite_all_docs_by_author(self, keypair: Identity) -> int:
"""Overwrite every document from this author, including history versions, with an empty doc
:returns: the number of documents changed, or -1 if there was an error.
"""
logger.debug('overwriteAllDocsByAuthor("%s")', keypair.address)
7 months ago
if self._is_closed:
raise ReplicaIsClosedError()
# TODO: do this in batches
8 months ago
query = Query(
flt={'author': keypair.address},
history_mode=HistoryMode.ALL,
)
docs_to_overwrite = await self.query_docs(query)
logger.debug(' ...found %d docs to overwrite', len(docs_to_overwrite))
7 months ago
num_overwritten = 0
num_already_empty = 0
for doc in docs_to_overwrite:
if not doc.content:
num_already_empty += 1
continue
8 months ago
# remove extra fields
cleaned_result = self.format_validator.remove_extra_fields(doc)
cleaned_doc = cleaned_result.doc
7 months ago
8 months ago
# make new doc which is empty and just barely newer than the original
7 months ago
empty_doc = Document(
8 months ago
cleaned_doc,
content='',
content_hash=await Crypto.sha256base32(''),
timestamp=doc.timestamp + 1,
signature='?',
7 months ago
)
8 months ago
try:
# sign and ingest it
signed_doc = await self.format_validator.sign_document(keypair, empty_doc)
except EarthsnakeError:
return signed_doc
7 months ago
8 months ago
ingest_event = await self.ingest(signed_doc)
7 months ago
8 months ago
if ingest_event.kind == 'failure':
return ValidationError(
f'ingestion error during overwriteAllDocsBySameAuthor: {ingest_event.reason}: {ingest_event.err}',
7 months ago
)
8 months ago
if ingest_event.kind == 'nothing_happened':
return ValidationError(
f'ingestion did nothing during overwriteAllDocsBySameAuthor: {ingest_event.reason}',
)
7 months ago
8 months ago
# success
num_overwritten += 1
7 months ago
8 months ago
logger.debug(
' ...done; %d overwritten to be empty; %d were already empty; out of total %d docs',
num_overwritten,
num_already_empty,
len(docs_to_overwrite)
)
7 months ago
8 months ago
return num_overwritten