1
1
import asyncio
2
- import json
2
+ from io import StringIO
3
3
from typing import Dict , Optional , List , Any , Mapping , Set , cast , Type , Union
4
4
5
5
from aleph_message .models import StoreContent , ItemType , Chain , MessageType
17
17
ContentCurrentlyUnavailable ,
18
18
)
19
19
from aleph .schemas .chains .indexer_response import MessageEvent , GenericMessageEvent
20
+ from aleph .schemas .chains .sync_events import (
21
+ OffChainSyncEventPayload ,
22
+ OnChainSyncEventPayload ,
23
+ OnChainContent ,
24
+ OnChainMessage ,
25
+ )
20
26
from aleph .schemas .chains .tezos_indexer_response import (
21
27
MessageEventPayload as TezosMessageEventPayload ,
22
28
)
27
33
from aleph .types .files import FileType
28
34
from aleph .utils import get_sha256
29
35
30
- INCOMING_MESSAGE_AUTHORIZED_FIELDS = [
31
- "item_hash" ,
32
- "item_content" ,
33
- "item_type" ,
34
- "chain" ,
35
- "channel" ,
36
- "sender" ,
37
- "type" ,
38
- "time" ,
39
- "signature" ,
40
- ]
41
-
42
36
43
37
class ChainDataService :
44
38
def __init__ (
@@ -47,52 +41,39 @@ def __init__(
47
41
self .session_factory = session_factory
48
42
self .storage_service = storage_service
49
43
50
- # TODO: split this function in severa
51
- async def get_chaindata (
52
- self , session : DbSession , messages : List [MessageDb ], bulk_threshold : int = 2000
53
- ):
54
- """Returns content ready to be broadcast on-chain (aka chaindata).
44
+ async def prepare_sync_event_payload (
45
+ self , session : DbSession , messages : List [MessageDb ]
46
+ ) -> OffChainSyncEventPayload :
47
+ """
48
+ Returns the payload of a sync event to be published on chain.
49
+
50
+ We publish message archives on-chain at regular intervals. This function prepares the data
51
+ before the node emits a transaction on-chain:
52
+ 1. Pack all messages as a JSON file
53
+ 2. Add this file to IPFS and get its CID
54
+ 3. Return the CID + some metadata.
55
55
56
- If message length is over bulk_threshold (default 2000 chars), store list
57
- in IPFS and store the object hash instead of raw list.
56
+ Note that the archive file is pinned on IPFS but not inserted in the `file_pins` table
57
+ here. This is left upon the caller once the event is successfully emitted on chain to avoid
58
+ persisting unused archives.
58
59
"""
60
+ # In previous versions, it was envisioned to store messages on-chain. This proved to be
61
+ # too expensive. The archive uses the same format as these "on-chain" data.
62
+ archive = OnChainSyncEventPayload (
63
+ protocol = ChainSyncProtocol .ON_CHAIN_SYNC ,
64
+ version = 1 ,
65
+ content = OnChainContent (
66
+ messages = [OnChainMessage .from_orm (message ) for message in messages ]
67
+ ),
68
+ )
69
+ archive_content = archive .json ()
59
70
60
- # TODO: this function is used to guarantee that the chain sync protocol is not broken
61
- # while shifting to Postgres.
62
- # * exclude the useless fields in the DB query directly and get rid of
63
- # INCOMING_MESSAGE_AUTHORIZED_FIELDS
64
- # * use a Pydantic model to enforce the output format
65
- def message_to_dict (_message : MessageDb ) -> Mapping [str , Any ]:
66
- message_dict = {
67
- k : v
68
- for k , v in _message .to_dict ().items ()
69
- if k in INCOMING_MESSAGE_AUTHORIZED_FIELDS
70
- }
71
- # Convert the time field to epoch
72
- message_dict ["time" ] = message_dict ["time" ].timestamp ()
73
- return message_dict
74
-
75
- message_dicts = [message_to_dict (message ) for message in messages ]
76
-
77
- chaindata = {
78
- "protocol" : ChainSyncProtocol .ON_CHAIN_SYNC ,
79
- "version" : 1 ,
80
- "content" : {"messages" : message_dicts },
81
- }
82
- content = json .dumps (chaindata )
83
- if len (content ) > bulk_threshold :
84
- ipfs_id = await self .storage_service .add_json (
85
- session = session , value = chaindata
86
- )
87
- return json .dumps (
88
- {
89
- "protocol" : ChainSyncProtocol .OFF_CHAIN_SYNC ,
90
- "version" : 1 ,
91
- "content" : ipfs_id ,
92
- }
93
- )
94
- else :
95
- return content
71
+ ipfs_cid = await self .storage_service .add_file (
72
+ session = session , fileobject = StringIO (archive_content ), engine = ItemType .ipfs
73
+ )
74
+ return OffChainSyncEventPayload (
75
+ protocol = ChainSyncProtocol .OFF_CHAIN_SYNC , version = 1 , content = ipfs_cid
76
+ )
96
77
97
78
@staticmethod
98
79
def _get_sync_messages (tx_content : Mapping [str , Any ]):
0 commit comments