removing dev branch, many changes

2023-05-29 19:24:57 +00:00
parent 1430f3f656
commit 0a890c8707
250 changed files with 18084 additions and 8040 deletions
@@ -0,0 +1,18 @@
+use super::*;
+
+impl StorageManager {
+    pub(crate) async fn debug_local_records(&self) -> String {
+        let inner = self.inner.lock().await;
+        let Some(local_record_store) = &inner.local_record_store else {
+            return "not initialized".to_owned();
+        };
+        local_record_store.debug_records()
+    }
+    pub(crate) async fn debug_remote_records(&self) -> String {
+        let inner = self.inner.lock().await;
+        let Some(remote_record_store) = &inner.remote_record_store else {
+            return "not initialized".to_owned();
+        };
+        remote_record_store.debug_records()
+    }
+}
@@ -0,0 +1,191 @@
+use super::*;
+
+/// The context of the do_get_value operation
+struct DoGetValueContext {
+    /// The latest value of the subkey, may be the value passed in
+    pub value: Option<SignedValueData>,
+    /// The consensus count for the value we have received
+    pub value_count: usize,
+    /// The descriptor if we got a fresh one or empty if no descriptor was needed
+    pub descriptor: Option<SignedValueDescriptor>,
+    /// The parsed schema from the descriptor if we have one
+    pub schema: Option<DHTSchema>,
+}
+
+impl StorageManager {
+
+    /// Perform a 'get value' query on the network
+    pub async fn outbound_get_value(
+        &self,
+        rpc_processor: RPCProcessor,
+        key: TypedKey,
+        subkey: ValueSubkey,
+        safety_selection: SafetySelection,
+        last_subkey_result: SubkeyResult,
+    ) -> VeilidAPIResult<SubkeyResult> {
+        let routing_table = rpc_processor.routing_table();
+
+        // Get the DHT parameters for 'GetValue'
+        let (key_count, consensus_count, fanout, timeout_us) = {
+            let c = self.unlocked_inner.config.get();
+            (
+                c.network.dht.max_find_node_count as usize,
+                c.network.dht.get_value_count as usize,
+                c.network.dht.get_value_fanout as usize,
+                TimestampDuration::from(ms_to_us(c.network.dht.get_value_timeout_ms)),
+            )
+        };
+
+        // Make do-get-value answer context
+        let schema = if let Some(d) = &last_subkey_result.descriptor {
+            Some(d.schema()?)
+        } else {
+            None
+        };
+        let context = Arc::new(Mutex::new(DoGetValueContext {
+            value: last_subkey_result.value,
+            value_count: 0,
+            descriptor: last_subkey_result.descriptor.clone(),
+            schema,
+        }));
+
+        // Routine to call to generate fanout
+        let call_routine = |next_node: NodeRef| {
+            let rpc_processor = rpc_processor.clone();
+            let context = context.clone();
+            let last_descriptor = last_subkey_result.descriptor.clone();
+            async move {
+                let vres = rpc_processor
+                    .clone()
+                    .rpc_call_get_value(
+                        Destination::direct(next_node).with_safety(safety_selection),
+                        key,
+                        subkey,
+                        last_descriptor,
+                    )
+                    .await?;
+                let gva = network_result_value_or_log!(vres => {
+                    // Any other failures, just try the next node
+                    return Ok(None);
+                });
+
+                // Keep the descriptor if we got one. If we had a last_descriptor it will
+                // already be validated by rpc_call_get_value
+                if let Some(descriptor) = gva.answer.descriptor {
+                    let mut ctx = context.lock();
+                    if ctx.descriptor.is_none() && ctx.schema.is_none() {
+                        ctx.schema =
+                            Some(descriptor.schema().map_err(RPCError::invalid_format)?);
+                        ctx.descriptor = Some(descriptor);
+                    }
+                }
+
+                // Keep the value if we got one and it is newer and it passes schema validation
+                if let Some(value) = gva.answer.value {
+                    let mut ctx = context.lock();
+
+                    // Ensure we have a schema and descriptor
+                    let (Some(descriptor), Some(schema)) = (&ctx.descriptor, &ctx.schema) else {
+                        // Got a value but no descriptor for it
+                        // Move to the next node
+                        return Ok(None);
+                    };
+
+                    // Validate with schema
+                    if !schema.check_subkey_value_data(
+                        descriptor.owner(),
+                        subkey,
+                        value.value_data(),
+                    ) {
+                        // Validation failed, ignore this value
+                        // Move to the next node
+                        return Ok(None);
+                    }
+
+                    // If we have a prior value, see if this is a newer sequence number
+                    if let Some(prior_value) = &ctx.value {
+                        let prior_seq = prior_value.value_data().seq();
+                        let new_seq = value.value_data().seq();
+
+                        if new_seq == prior_seq {
+                            // If sequence number is the same, the data should be the same
+                            if prior_value.value_data() != value.value_data() {
+                                // Move to the next node
+                                return Ok(None);
+                            }
+                            // Increase the consensus count for the existing value
+                            ctx.value_count += 1;
+                        } else if new_seq > prior_seq {
+                            // If the sequence number is greater, start over with the new value
+                            ctx.value = Some(value);
+                            // One node has show us this value so far
+                            ctx.value_count = 1;
+                        } else {
+                            // If the sequence number is older, ignore it
+                        }
+                    }
+                }
+
+                // Return peers if we have some
+                Ok(Some(gva.answer.peers))
+            }
+        };
+
+        // Routine to call to check if we're done at each step
+        let check_done = |_closest_nodes: &[NodeRef]| {
+            // If we have reached sufficient consensus, return done
+            let ctx = context.lock();
+            if ctx.value.is_some() && ctx.descriptor.is_some() && ctx.value_count >= consensus_count {
+                return Some(());
+            }
+            None
+        };
+
+        // Call the fanout
+        let fanout_call = FanoutCall::new(
+            routing_table.clone(),
+            key,
+            key_count,
+            fanout,
+            timeout_us,
+            call_routine,
+            check_done,
+        );
+
+        match fanout_call.run().await {
+            // If we don't finish in the timeout (too much time passed checking for consensus)
+            TimeoutOr::Timeout | 
+            // If we finished with consensus (enough nodes returning the same value)
+            TimeoutOr::Value(Ok(Some(()))) | 
+            // If we finished without consensus (ran out of nodes before getting consensus)
+            TimeoutOr::Value(Ok(None)) => {
+                // Return the best answer we've got
+                let ctx = context.lock();
+                Ok(SubkeyResult{
+                    value: ctx.value.clone(),
+                    descriptor: ctx.descriptor.clone(),
+                })
+            }
+            // Failed
+            TimeoutOr::Value(Err(e)) => {
+                // If we finished with an error, return that
+                Err(e.into())
+            }
+        }
+    }
+
+    /// Handle a recieved 'Get Value' query
+    pub async fn inbound_get_value(&self, key: TypedKey, subkey: ValueSubkey, want_descriptor: bool) -> VeilidAPIResult<NetworkResult<SubkeyResult>> {
+        let mut inner = self.lock().await?;
+        let res = match inner.handle_get_remote_value(key, subkey, want_descriptor).await {            
+            Ok(res) => res,
+            Err(VeilidAPIError::Internal { message }) => {
+                apibail_internal!(message);
+            },
+            Err(e) => {
+                return Ok(NetworkResult::invalid_message(e));
+            },
+        };
+        Ok(NetworkResult::value(res))
+    }
+}
@@ -0,0 +1,63 @@
+use super::*;
+
+#[derive(Debug, Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash)]
+pub struct RecordTableKey {
+    pub key: TypedKey,
+}
+impl RecordTableKey {
+    pub fn bytes(&self) -> [u8; PUBLIC_KEY_LENGTH + 4] {
+        let mut bytes = [0u8; PUBLIC_KEY_LENGTH + 4];
+        bytes[0..4].copy_from_slice(&self.key.kind.0);
+        bytes[4..PUBLIC_KEY_LENGTH + 4].copy_from_slice(&self.key.value.bytes);
+        bytes
+    }
+}
+
+impl TryFrom<&[u8]> for RecordTableKey {
+    type Error = EyreReport;
+    fn try_from(bytes: &[u8]) -> Result<Self, Self::Error> {
+        if bytes.len() != PUBLIC_KEY_LENGTH + 4 {
+            bail!("invalid bytes length");
+        }
+        let kind = FourCC::try_from(&bytes[0..4]).wrap_err("invalid kind")?;
+        let value =
+            PublicKey::try_from(&bytes[4..PUBLIC_KEY_LENGTH + 4]).wrap_err("invalid value")?;
+        let key = TypedKey::new(kind, value);
+        Ok(RecordTableKey { key })
+    }
+}
+
+#[derive(Debug, Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash)]
+pub struct SubkeyTableKey {
+    pub key: TypedKey,
+    pub subkey: ValueSubkey,
+}
+impl SubkeyTableKey {
+    pub fn bytes(&self) -> [u8; PUBLIC_KEY_LENGTH + 4 + 4] {
+        let mut bytes = [0u8; PUBLIC_KEY_LENGTH + 4 + 4];
+        bytes[0..4].copy_from_slice(&self.key.kind.0);
+        bytes[4..PUBLIC_KEY_LENGTH + 4].copy_from_slice(&self.key.value.bytes);
+        bytes[PUBLIC_KEY_LENGTH + 4..PUBLIC_KEY_LENGTH + 4 + 4]
+            .copy_from_slice(&self.subkey.to_le_bytes());
+        bytes
+    }
+}
+impl TryFrom<&[u8]> for SubkeyTableKey {
+    type Error = EyreReport;
+    fn try_from(bytes: &[u8]) -> Result<Self, Self::Error> {
+        if bytes.len() != PUBLIC_KEY_LENGTH + 4 {
+            bail!("invalid bytes length");
+        }
+        let kind = FourCC::try_from(&bytes[0..4]).wrap_err("invalid kind")?;
+        let value =
+            PublicKey::try_from(&bytes[4..PUBLIC_KEY_LENGTH + 4]).wrap_err("invalid value")?;
+        let subkey = ValueSubkey::from_le_bytes(
+            bytes[PUBLIC_KEY_LENGTH + 4..PUBLIC_KEY_LENGTH + 4 + 4]
+                .try_into()
+                .wrap_err("invalid subkey")?,
+        );
+
+        let key = TypedKey::new(kind, value);
+        Ok(SubkeyTableKey { key, subkey })
+    }
+}
@@ -0,0 +1,411 @@
+mod debug;
+mod get_value;
+mod keys;
+mod record_store;
+mod record_store_limits;
+mod set_value;
+mod storage_manager_inner;
+mod tasks;
+mod types;
+
+use keys::*;
+use record_store::*;
+use record_store_limits::*;
+use storage_manager_inner::*;
+
+pub use types::*;
+
+use super::*;
+use crate::rpc_processor::*;
+
+/// The maximum size of a single subkey
+const MAX_SUBKEY_SIZE: usize = ValueData::MAX_LEN;
+/// The maximum total size of all subkeys of a record
+const MAX_RECORD_DATA_SIZE: usize = 1_048_576;
+/// Frequency to flush record stores to disk
+const FLUSH_RECORD_STORES_INTERVAL_SECS: u32 = 1;
+
+struct StorageManagerUnlockedInner {
+    config: VeilidConfig,
+    crypto: Crypto,
+    protected_store: ProtectedStore,
+    table_store: TableStore,
+    block_store: BlockStore,
+
+    // Background processes
+    flush_record_stores_task: TickTask<EyreReport>,
+}
+
+#[derive(Clone)]
+pub struct StorageManager {
+    unlocked_inner: Arc<StorageManagerUnlockedInner>,
+    inner: Arc<AsyncMutex<StorageManagerInner>>,
+}
+
+impl StorageManager {
+    fn new_unlocked_inner(
+        config: VeilidConfig,
+        crypto: Crypto,
+        protected_store: ProtectedStore,
+        table_store: TableStore,
+        block_store: BlockStore,
+    ) -> StorageManagerUnlockedInner {
+        StorageManagerUnlockedInner {
+            config,
+            crypto,
+            protected_store,
+            table_store,
+            block_store,
+            flush_record_stores_task: TickTask::new(FLUSH_RECORD_STORES_INTERVAL_SECS),
+        }
+    }
+    fn new_inner(unlocked_inner: Arc<StorageManagerUnlockedInner>) -> StorageManagerInner {
+        StorageManagerInner::new(unlocked_inner)
+    }
+
+    pub fn new(
+        config: VeilidConfig,
+        crypto: Crypto,
+        protected_store: ProtectedStore,
+        table_store: TableStore,
+        block_store: BlockStore,
+    ) -> StorageManager {
+        let unlocked_inner = Arc::new(Self::new_unlocked_inner(
+            config,
+            crypto,
+            protected_store,
+            table_store,
+            block_store,
+        ));
+        let this = StorageManager {
+            unlocked_inner: unlocked_inner.clone(),
+            inner: Arc::new(AsyncMutex::new(Self::new_inner(unlocked_inner))),
+        };
+
+        this.setup_tasks();
+
+        this
+    }
+
+    #[instrument(level = "debug", skip_all, err)]
+    pub async fn init(&self) -> EyreResult<()> {
+        debug!("startup storage manager");
+
+        let mut inner = self.inner.lock().await;
+        inner.init(self.clone()).await?;
+
+        Ok(())
+    }
+
+    pub async fn terminate(&self) {
+        debug!("starting storage manager shutdown");
+
+        let mut inner = self.inner.lock().await;
+        inner.terminate().await;
+
+        // Cancel all tasks
+        self.cancel_tasks().await;
+
+        // Release the storage manager
+        *inner = Self::new_inner(self.unlocked_inner.clone());
+
+        debug!("finished storage manager shutdown");
+    }
+
+    pub async fn set_rpc_processor(&self, opt_rpc_processor: Option<RPCProcessor>) {
+        let mut inner = self.inner.lock().await;
+        inner.rpc_processor = opt_rpc_processor
+    }
+
+    async fn lock(&self) -> VeilidAPIResult<AsyncMutexGuardArc<StorageManagerInner>> {
+        let inner = asyncmutex_lock_arc!(&self.inner);
+        if !inner.initialized {
+            apibail_not_initialized!();
+        }
+        Ok(inner)
+    }
+
+    /// Create a local record from scratch with a new owner key, open it, and return the opened descriptor
+    pub async fn create_record(
+        &self,
+        kind: CryptoKind,
+        schema: DHTSchema,
+        safety_selection: SafetySelection,
+    ) -> VeilidAPIResult<DHTRecordDescriptor> {
+        let mut inner = self.lock().await?;
+
+        // Create a new owned local record from scratch
+        let (key, owner) = inner
+            .create_new_owned_local_record(kind, schema, safety_selection)
+            .await?;
+
+        // Now that the record is made we should always succeed to open the existing record
+        // The initial writer is the owner of the record
+        inner
+            .open_existing_record(key, Some(owner), safety_selection)
+            .map(|r| r.unwrap())
+    }
+
+    /// Open an existing local record if it exists,
+    /// and if it doesnt exist locally, try to pull it from the network and
+    /// open it and return the opened descriptor
+    pub async fn open_record(
+        &self,
+        key: TypedKey,
+        writer: Option<KeyPair>,
+        safety_selection: SafetySelection,
+    ) -> VeilidAPIResult<DHTRecordDescriptor> {
+        let mut inner = self.lock().await?;
+
+        // See if we have a local record already or not
+        if let Some(res) = inner.open_existing_record(key, writer, safety_selection)? {
+            return Ok(res);
+        }
+
+        // No record yet, try to get it from the network
+
+        // Get rpc processor and drop mutex so we don't block while getting the value from the network
+        let Some(rpc_processor) = inner.rpc_processor.clone() else {
+            // Offline, try again later
+            apibail_try_again!();
+        };
+
+        // Drop the mutex so we dont block during network access
+        drop(inner);
+
+        // No last descriptor, no last value
+        // Use the safety selection we opened the record with
+        let subkey: ValueSubkey = 0;
+        let subkey_result = self
+            .outbound_get_value(
+                rpc_processor,
+                key,
+                subkey,
+                safety_selection,
+                SubkeyResult::default(),
+            )
+            .await?;
+
+        // If we got nothing back, the key wasn't found
+        if subkey_result.value.is_none() && subkey_result.descriptor.is_none() {
+            // No result
+            apibail_key_not_found!(key);
+        };
+
+        // Reopen inner to store value we just got
+        let mut inner = self.lock().await?;
+
+        // Open the new record
+        inner
+            .open_new_record(key, writer, subkey, subkey_result, safety_selection)
+            .await
+    }
+
+    /// Close an opened local record
+    pub async fn close_record(&self, key: TypedKey) -> VeilidAPIResult<()> {
+        let mut inner = self.lock().await?;
+        inner.close_record(key)
+    }
+
+    /// Delete a local record
+    pub async fn delete_record(&self, key: TypedKey) -> VeilidAPIResult<()> {
+        let mut inner = self.lock().await?;
+
+        // Ensure the record is closed
+        if inner.opened_records.contains_key(&key) {
+            inner.close_record(key)?;
+        }
+
+        let Some(local_record_store) = inner.local_record_store.as_mut() else {
+            apibail_not_initialized!();
+        };
+
+        // Remove the record from the local store
+        local_record_store.delete_record(key).await
+    }
+
+    /// Get the value of a subkey from an opened local record
+    /// may refresh the record, and will if it is forced to or the subkey is not available locally yet
+    /// Returns Ok(None) if no value was found
+    /// Returns Ok(Some(value)) is a value was found online or locally
+    pub async fn get_value(
+        &self,
+        key: TypedKey,
+        subkey: ValueSubkey,
+        force_refresh: bool,
+    ) -> VeilidAPIResult<Option<ValueData>> {
+        let mut inner = self.lock().await?;
+        let Some(opened_record) = inner.opened_records.remove(&key) else {
+            apibail_generic!("record not open");
+        };
+
+        // See if the requested subkey is our local record store
+        let last_subkey_result = inner.handle_get_local_value(key, subkey, true).await?;
+
+        // Return the existing value if we have one unless we are forcing a refresh
+        if !force_refresh {
+            if let Some(last_subkey_result_value) = last_subkey_result.value {
+                return Ok(Some(last_subkey_result_value.into_value_data()));
+            }
+        }
+
+        // Refresh if we can
+
+        // Get rpc processor and drop mutex so we don't block while getting the value from the network
+        let Some(rpc_processor) = inner.rpc_processor.clone() else {
+            // Offline, try again later
+            apibail_try_again!();
+        };
+
+        // Drop the lock for network access
+        drop(inner);
+
+        // May have last descriptor / value
+        // Use the safety selection we opened the record with
+        let opt_last_seq = last_subkey_result
+            .value
+            .as_ref()
+            .map(|v| v.value_data().seq());
+        let subkey_result = self
+            .outbound_get_value(
+                rpc_processor,
+                key,
+                subkey,
+                opened_record.safety_selection(),
+                last_subkey_result,
+            )
+            .await?;
+
+        // See if we got a value back
+        let Some(subkey_result_value) = subkey_result.value else {
+            // If we got nothing back then we also had nothing beforehand, return nothing
+            return Ok(None);
+        };
+
+        // If we got a new value back then write it to the opened record
+        if Some(subkey_result_value.value_data().seq()) != opt_last_seq {
+            let mut inner = self.lock().await?;
+            inner
+                .handle_set_local_value(key, subkey, subkey_result_value.clone())
+                .await?;
+        }
+        Ok(Some(subkey_result_value.into_value_data()))
+    }
+
+    /// Set the value of a subkey on an opened local record
+    /// Puts changes to the network immediately and may refresh the record if the there is a newer subkey available online
+    /// Returns Ok(None) if the value was set
+    /// Returns Ok(Some(newer value)) if a newer value was found online
+    pub async fn set_value(
+        &self,
+        key: TypedKey,
+        subkey: ValueSubkey,
+        data: Vec<u8>,
+    ) -> VeilidAPIResult<Option<ValueData>> {
+        let mut inner = self.lock().await?;
+
+        // Get cryptosystem
+        let Some(vcrypto) = self.unlocked_inner.crypto.get(key.kind) else {
+            apibail_generic!("unsupported cryptosystem");
+        };
+
+        let Some(opened_record) = inner.opened_records.remove(&key) else {
+            apibail_generic!("record not open");
+        };
+
+        // If we don't have a writer then we can't write
+        let Some(writer) = opened_record.writer().cloned() else {
+            apibail_generic!("value is not writable");
+        };
+
+        // See if the subkey we are modifying has a last known local value
+        let last_subkey_result = inner.handle_get_local_value(key, subkey, true).await?;
+
+        // Get the descriptor and schema for the key
+        let Some(descriptor) = last_subkey_result.descriptor else {
+            apibail_generic!("must have a descriptor");
+        };
+        let schema = descriptor.schema()?;
+
+        // Make new subkey data
+        let value_data = if let Some(signed_value_data) = last_subkey_result.value {
+            let seq = signed_value_data.value_data().seq();
+            ValueData::new_with_seq(seq + 1, data, writer.key)
+        } else {
+            ValueData::new(data, writer.key)
+        };
+        let seq = value_data.seq();
+
+        // Validate with schema
+        if !schema.check_subkey_value_data(descriptor.owner(), subkey, &value_data) {
+            // Validation failed, ignore this value
+            apibail_generic!("failed schema validation");
+        }
+
+        // Sign the new value data with the writer
+        let signed_value_data = SignedValueData::make_signature(
+            value_data,
+            descriptor.owner(),
+            subkey,
+            vcrypto,
+            writer.secret,
+        )?;
+
+        // Get rpc processor and drop mutex so we don't block while getting the value from the network
+        let Some(rpc_processor) = inner.rpc_processor.clone() else {
+            // Offline, just write it locally and return immediately
+            inner
+                .handle_set_local_value(key, subkey, signed_value_data.clone())
+                .await?;
+
+            // Add to offline writes to flush
+            inner.offline_subkey_writes.entry(key).and_modify(|x| { x.insert(subkey); } ).or_insert(ValueSubkeyRangeSet::single(subkey));
+            return Ok(Some(signed_value_data.into_value_data()))
+        };
+
+        // Drop the lock for network access
+        drop(inner);
+
+        // Use the safety selection we opened the record with
+
+        let final_signed_value_data = self
+            .outbound_set_value(
+                rpc_processor,
+                key,
+                subkey,
+                opened_record.safety_selection(),
+                signed_value_data,
+                descriptor,
+            )
+            .await?;
+
+        // If we got a new value back then write it to the opened record
+        if final_signed_value_data.value_data().seq() != seq {
+            let mut inner = self.lock().await?;
+            inner
+                .handle_set_local_value(key, subkey, final_signed_value_data.clone())
+                .await?;
+        }
+        Ok(Some(final_signed_value_data.into_value_data()))
+    }
+
+    pub async fn watch_values(
+        &self,
+        key: TypedKey,
+        subkeys: ValueSubkeyRangeSet,
+        expiration: Timestamp,
+        count: u32,
+    ) -> VeilidAPIResult<Timestamp> {
+        let inner = self.lock().await?;
+        unimplemented!();
+    }
+
+    pub async fn cancel_watch_values(
+        &self,
+        key: TypedKey,
+        subkeys: ValueSubkeyRangeSet,
+    ) -> VeilidAPIResult<bool> {
+        let inner = self.lock().await?;
+        unimplemented!();
+    }
+}
@@ -0,0 +1,548 @@
+/// RecordStore
+/// Keeps an LRU cache of dht keys and their associated subkey valuedata.
+/// Instances of this store are used for 'local' (persistent) and 'remote' (ephemeral) dht key storage.
+/// This store does not perform any validation on the schema, and all ValueRecordData passed in must have been previously validated.
+/// Uses an in-memory store for the records, backed by the TableStore. Subkey data is LRU cached and rotated out by a limits policy,
+/// and backed to the TableStore for persistence.
+use super::*;
+use hashlink::LruCache;
+
+pub struct RecordStore<D>
+where
+    D: Clone + RkyvArchive + RkyvSerialize<DefaultVeilidRkyvSerializer>,
+    for<'t> <D as RkyvArchive>::Archived: CheckBytes<RkyvDefaultValidator<'t>>,
+    <D as RkyvArchive>::Archived: RkyvDeserialize<D, VeilidSharedDeserializeMap>,
+{
+    table_store: TableStore,
+    name: String,
+    limits: RecordStoreLimits,
+
+    record_table: Option<TableDB>,
+    subkey_table: Option<TableDB>,
+    record_index: LruCache<RecordTableKey, Record<D>>,
+    subkey_cache: LruCache<SubkeyTableKey, RecordData>,
+    subkey_cache_total_size: usize,
+    total_storage_space: usize,
+
+    dead_records: Vec<(RecordTableKey, Record<D>)>,
+    changed_records: HashSet<RecordTableKey>,
+
+    purge_dead_records_mutex: Arc<AsyncMutex<()>>,
+}
+
+/// The result of the do_get_value_operation
+#[derive(Default, Debug)]
+pub struct SubkeyResult {
+    /// The subkey value if we got one
+    pub value: Option<SignedValueData>,
+    /// The descriptor if we got a fresh one or empty if no descriptor was needed
+    pub descriptor: Option<SignedValueDescriptor>,
+}
+
+impl<D> RecordStore<D>
+where
+    D: Clone + RkyvArchive + RkyvSerialize<DefaultVeilidRkyvSerializer>,
+    for<'t> <D as RkyvArchive>::Archived: CheckBytes<RkyvDefaultValidator<'t>>,
+    <D as RkyvArchive>::Archived: RkyvDeserialize<D, VeilidSharedDeserializeMap>,
+{
+    pub fn new(table_store: TableStore, name: &str, limits: RecordStoreLimits) -> Self {
+        let subkey_cache_size = limits.subkey_cache_size as usize;
+        Self {
+            table_store,
+            name: name.to_owned(),
+            limits,
+            record_table: None,
+            subkey_table: None,
+            record_index: LruCache::new(limits.max_records.unwrap_or(usize::MAX)),
+            subkey_cache: LruCache::new(subkey_cache_size),
+            subkey_cache_total_size: 0,
+            total_storage_space: 0,
+            dead_records: Vec::new(),
+            changed_records: HashSet::new(),
+            purge_dead_records_mutex: Arc::new(AsyncMutex::new(())),
+        }
+    }
+
+    pub async fn init(&mut self) -> EyreResult<()> {
+        let record_table = self
+            .table_store
+            .open(&format!("{}_records", self.name), 1)
+            .await?;
+        let subkey_table = self
+            .table_store
+            .open(&&format!("{}_subkeys", self.name), 1)
+            .await?;
+
+        // Pull record index from table into a vector to ensure we sort them
+        let record_table_keys = record_table.get_keys(0).await?;
+        let mut record_index_saved: Vec<(RecordTableKey, Record<D>)> =
+            Vec::with_capacity(record_table_keys.len());
+        for rtk in record_table_keys {
+            if let Some(vr) = record_table.load_rkyv::<Record<D>>(0, &rtk).await? {
+                let rik = RecordTableKey::try_from(rtk.as_ref())?;
+                record_index_saved.push((rik, vr));
+            }
+        }
+
+        // Sort the record index by last touched time and insert in sorted order
+        record_index_saved.sort_by(|a, b| a.1.last_touched().cmp(&b.1.last_touched()));
+        let mut dead_records = Vec::new();
+        for ri in record_index_saved {
+            // total the storage space
+            self.total_storage_space += mem::size_of::<RecordTableKey>();
+            self.total_storage_space += ri.1.total_size();
+
+            // add to index and ensure we deduplicate in the case of an error
+            if let Some(v) = self.record_index.insert(ri.0, ri.1, |k, v| {
+                // If the configuration change, we only want to keep the 'limits.max_records' records
+                dead_records.push((k, v));
+            }) {
+                // This shouldn't happen, but deduplicate anyway
+                log_stor!(warn "duplicate record in table: {:?}", ri.0);
+                dead_records.push((ri.0, v));
+            }
+        }
+        for (k, v) in dead_records {
+            self.add_dead_record(k, v);
+        }
+
+        self.record_table = Some(record_table);
+        self.subkey_table = Some(subkey_table);
+        Ok(())
+    }
+
+    fn add_dead_record(&mut self, key: RecordTableKey, record: Record<D>) {
+        self.dead_records.push((key, record));
+    }
+
+    fn mark_record_changed(&mut self, key: RecordTableKey) {
+        self.changed_records.insert(key);
+    }
+
+    fn add_to_subkey_cache(&mut self, key: SubkeyTableKey, record_data: RecordData) {
+        let record_data_total_size = record_data.total_size();
+        // Write to subkey cache
+        let mut dead_size = 0usize;
+        if let Some(old_record_data) = self.subkey_cache.insert(key, record_data, |_, v| {
+            // LRU out
+            dead_size += v.total_size();
+        }) {
+            // Old data
+            dead_size += old_record_data.total_size();
+        }
+        self.subkey_cache_total_size -= dead_size;
+        self.subkey_cache_total_size += record_data_total_size;
+
+        // Purge over size limit
+        if let Some(max_subkey_cache_memory_mb) = self.limits.max_subkey_cache_memory_mb {
+            while self.subkey_cache_total_size > (max_subkey_cache_memory_mb * 1_048_576usize) {
+                if let Some((_, v)) = self.subkey_cache.remove_lru() {
+                    self.subkey_cache_total_size -= v.total_size();
+                } else {
+                    break;
+                }
+            }
+        }
+    }
+
+    fn remove_from_subkey_cache(&mut self, key: SubkeyTableKey) {
+        if let Some(dead_record_data) = self.subkey_cache.remove(&key) {
+            self.subkey_cache_total_size -= dead_record_data.total_size();
+        }
+    }
+
+    async fn purge_dead_records(&mut self, lazy: bool) {
+        let purge_dead_records_mutex = self.purge_dead_records_mutex.clone();
+        let _lock = if lazy {
+            match asyncmutex_try_lock!(purge_dead_records_mutex) {
+                Some(v) => v,
+                None => {
+                    // If not ready now, just skip it if we're lazy
+                    return;
+                }
+            }
+        } else {
+            // Not lazy, must wait
+            purge_dead_records_mutex.lock().await
+        };
+
+        // Delete dead keys
+        if self.dead_records.is_empty() {
+            return;
+        }
+
+        let record_table = self.record_table.clone().unwrap();
+        let subkey_table = self.subkey_table.clone().unwrap();
+
+        let rt_xact = record_table.transact();
+        let st_xact = subkey_table.transact();
+        let dead_records = mem::take(&mut self.dead_records);
+        for (k, v) in dead_records {
+            // Record should already be gone from index
+            if self.record_index.contains_key(&k) {
+                log_stor!(error "dead record found in index: {:?}", k);
+            }
+
+            // Delete record
+            rt_xact.delete(0, &k.bytes());
+
+            // Delete subkeys
+            let subkey_count = v.subkey_count() as u32;
+            for sk in 0..subkey_count {
+                // From table
+                let stk = SubkeyTableKey {
+                    key: k.key,
+                    subkey: sk,
+                };
+                st_xact.delete(0, &stk.bytes());
+
+                // From cache
+                self.remove_from_subkey_cache(stk);
+            }
+
+            // Remove from total size
+            self.total_storage_space -= mem::size_of::<RecordTableKey>();
+            self.total_storage_space -= v.total_size();
+        }
+        if let Err(e) = rt_xact.commit().await {
+            log_stor!(error "failed to commit record table transaction: {}", e);
+        }
+        if let Err(e) = st_xact.commit().await {
+            log_stor!(error "failed to commit subkey table transaction: {}", e);
+        }
+    }
+
+    async fn flush_changed_records(&mut self) {
+        // touch records
+        if self.changed_records.is_empty() {
+            return;
+        }
+
+        let record_table = self.record_table.clone().unwrap();
+
+        let rt_xact = record_table.transact();
+        let changed_records = mem::take(&mut self.changed_records);
+        for rtk in changed_records {
+            // Get the changed record and save it to the table
+            if let Some(r) = self.record_index.peek(&rtk) {
+                if let Err(e) = rt_xact.store_rkyv(0, &rtk.bytes(), r) {
+                    log_stor!(error "failed to save record: {}", e);
+                }
+            }
+        }
+        if let Err(e) = rt_xact.commit().await {
+            log_stor!(error "failed to commit record table transaction: {}", e);
+        }
+    }
+
+    pub async fn tick(&mut self) -> EyreResult<()> {
+        self.flush_changed_records().await;
+        self.purge_dead_records(true).await;
+        Ok(())
+    }
+
+    pub async fn new_record(&mut self, key: TypedKey, record: Record<D>) -> VeilidAPIResult<()> {
+        let rtk = RecordTableKey { key };
+        if self.record_index.contains_key(&rtk) {
+            apibail_internal!("record already exists");
+        }
+
+        // Get record table
+        let Some(record_table) = self.record_table.clone() else {
+            apibail_internal!("record store not initialized");
+        };
+
+        // If over size limit, dont create record
+        let new_total_storage_space =
+            self.total_storage_space + mem::size_of::<RecordTableKey>() + record.total_size();
+        if let Some(max_storage_space_mb) = &self.limits.max_storage_space_mb {
+            if new_total_storage_space > (max_storage_space_mb * 1_048_576usize) {
+                apibail_try_again!();
+            }
+        }
+
+        // Save to record table
+        record_table
+            .store_rkyv(0, &rtk.bytes(), &record)
+            .await
+            .map_err(VeilidAPIError::internal)?;
+
+        // Save to record index
+        let mut dead_records = Vec::new();
+        if let Some(v) = self.record_index.insert(rtk, record, |k, v| {
+            dead_records.push((k, v));
+        }) {
+            // Shouldn't happen but log it
+            log_stor!(warn "new duplicate record in table: {:?}", rtk);
+            self.add_dead_record(rtk, v);
+        }
+        for dr in dead_records {
+            self.add_dead_record(dr.0, dr.1);
+        }
+
+        // Update storage space
+        self.total_storage_space = new_total_storage_space;
+
+        Ok(())
+    }
+
+    pub async fn delete_record(&mut self, key: TypedKey) -> VeilidAPIResult<()> {
+        // Get the record table key
+        let rtk = RecordTableKey { key };
+
+        // Remove record from the index
+        let Some(record) = self.record_index.remove(&rtk) else {
+            apibail_key_not_found!(key);
+        };
+
+        self.add_dead_record(rtk, record);
+
+        self.purge_dead_records(false).await;
+
+        Ok(())
+    }
+
+    pub(super) fn with_record<R, F>(&mut self, key: TypedKey, f: F) -> Option<R>
+    where
+        F: FnOnce(&Record<D>) -> R,
+    {
+        // Get record from index
+        let mut out = None;
+        let rtk = RecordTableKey { key };
+        if let Some(record) = self.record_index.get_mut(&rtk) {
+            // Callback
+            out = Some(f(record));
+
+            // Touch
+            record.touch(get_aligned_timestamp());
+        }
+        if out.is_some() {
+            self.mark_record_changed(rtk);
+        }
+
+        out
+    }
+
+    pub(super) fn with_record_mut<R, F>(&mut self, key: TypedKey, f: F) -> Option<R>
+    where
+        F: FnOnce(&mut Record<D>) -> R,
+    {
+        // Get record from index
+        let mut out = None;
+        let rtk = RecordTableKey { key };
+        if let Some(record) = self.record_index.get_mut(&rtk) {
+            // Callback
+            out = Some(f(record));
+
+            // Touch
+            record.touch(get_aligned_timestamp());
+        }
+        if out.is_some() {
+            self.mark_record_changed(rtk);
+        }
+
+        out
+    }
+
+    // pub fn get_descriptor(&mut self, key: TypedKey) -> Option<SignedValueDescriptor> {
+    //     self.with_record(key, |record| record.descriptor().clone())
+    // }
+
+    pub async fn get_subkey(
+        &mut self,
+        key: TypedKey,
+        subkey: ValueSubkey,
+        want_descriptor: bool,
+    ) -> VeilidAPIResult<Option<SubkeyResult>> {
+        // record from index
+        let Some((subkey_count, opt_descriptor)) = self.with_record(key, |record| {
+            (record.subkey_count(), if want_descriptor {
+                Some(record.descriptor().clone())
+            } else {
+                None
+            })
+        }) else {
+            // Record not available
+            return Ok(None);
+        };
+
+        // Check if the subkey is in range
+        if subkey as usize >= subkey_count {
+            apibail_invalid_argument!("subkey out of range", "subkey", subkey);
+        }
+
+        // Get subkey table
+        let Some(subkey_table) = self.subkey_table.clone() else {
+            apibail_internal!("record store not initialized");
+        };
+
+        // If subkey exists in subkey cache, use that
+        let stk = SubkeyTableKey { key, subkey };
+        if let Some(record_data) = self.subkey_cache.get_mut(&stk) {
+            let out = record_data.signed_value_data().clone();
+
+            return Ok(Some(SubkeyResult {
+                value: Some(out),
+                descriptor: opt_descriptor,
+            }));
+        }
+        // If not in cache, try to pull from table store
+        if let Some(record_data) = subkey_table
+            .load_rkyv::<RecordData>(0, &stk.bytes())
+            .await
+            .map_err(VeilidAPIError::internal)?
+        {
+            let out = record_data.signed_value_data().clone();
+
+            // Add to cache, do nothing with lru out
+            self.add_to_subkey_cache(stk, record_data);
+
+            return Ok(Some(SubkeyResult {
+                value: Some(out),
+                descriptor: opt_descriptor,
+            }));
+        };
+
+        // Record was available, but subkey was not found, maybe descriptor gets returned
+        Ok(Some(SubkeyResult {
+            value: None,
+            descriptor: opt_descriptor,
+        }))
+    }
+
+    pub async fn set_subkey(
+        &mut self,
+        key: TypedKey,
+        subkey: ValueSubkey,
+        signed_value_data: SignedValueData,
+    ) -> VeilidAPIResult<()> {
+        // Check size limit for data
+        if signed_value_data.value_data().data().len() > self.limits.max_subkey_size {
+            apibail_invalid_argument!(
+                "record subkey too large",
+                "signed_value_data.value_data.data.len",
+                signed_value_data.value_data().data().len()
+            );
+        }
+
+        // Get record from index
+        let Some((subkey_count, total_size)) = self.with_record(key, |record| {
+            (record.subkey_count(), record.total_size())
+        }) else {
+            apibail_invalid_argument!("no record at this key", "key", key);
+        };
+
+        // Check if the subkey is in range
+        if subkey as usize >= subkey_count {
+            apibail_invalid_argument!("subkey out of range", "subkey", subkey);
+        }
+
+        // Get subkey table
+        let Some(subkey_table) = self.subkey_table.clone() else {
+            apibail_internal!("record store not initialized");
+        };
+
+        // Get the previous subkey and ensure we aren't going over the record size limit
+        let mut prior_record_data_size = 0usize;
+
+        // If subkey exists in subkey cache, use that
+        let stk = SubkeyTableKey { key, subkey };
+        let stk_bytes = stk.bytes();
+
+        if let Some(record_data) = self.subkey_cache.peek(&stk) {
+            prior_record_data_size = record_data.total_size();
+        } else {
+            // If not in cache, try to pull from table store
+            if let Some(record_data) = subkey_table
+                .load_rkyv::<RecordData>(0, &stk_bytes)
+                .await
+                .map_err(VeilidAPIError::internal)?
+            {
+                prior_record_data_size = record_data.total_size();
+            }
+        }
+
+        // Make new record data
+        let record_data = RecordData::new(signed_value_data);
+
+        // Check new total record size
+        let new_record_data_size = record_data.total_size();
+        let new_total_size = total_size + new_record_data_size - prior_record_data_size;
+        if new_total_size > self.limits.max_record_total_size {
+            apibail_generic!("dht record too large");
+        }
+
+        // Check new total storage space
+        let new_total_storage_space =
+            self.total_storage_space + new_record_data_size - prior_record_data_size;
+        if let Some(max_storage_space_mb) = self.limits.max_storage_space_mb {
+            if new_total_storage_space > (max_storage_space_mb * 1_048_576usize) {
+                apibail_try_again!();
+            }
+        }
+
+        // Write subkey
+        subkey_table
+            .store_rkyv(0, &stk_bytes, &record_data)
+            .await
+            .map_err(VeilidAPIError::internal)?;
+
+        // Write to subkey cache
+        self.add_to_subkey_cache(stk, record_data);
+
+        // Update record
+        self.with_record_mut(key, |record| {
+            record.set_record_data_size(new_record_data_size);
+        })
+        .expect("record should still be here");
+
+        Ok(())
+    }
+
+    /// LRU out some records until we reclaim the amount of space requested
+    /// This will force a garbage collection of the space immediately
+    /// If zero is passed in here, a garbage collection will be performed of dead records
+    /// without removing any live records
+    pub async fn reclaim_space(&mut self, space: usize) {
+        let mut reclaimed = 0usize;
+        while reclaimed < space {
+            if let Some((k, v)) = self.record_index.remove_lru() {
+                reclaimed += mem::size_of::<RecordTableKey>();
+                reclaimed += v.total_size();
+                self.add_dead_record(k, v);
+            }
+        }
+        self.purge_dead_records(false).await;
+    }
+
+    pub(super) fn debug_records(&self) -> String {
+        // Dump fields in an abbreviated way
+        let mut out = String::new();
+
+        out += "Record Index:\n";
+        for (rik, rec) in &self.record_index {
+            out += &format!(
+                "  {} @ {} len={}\n",
+                rik.key.to_string(),
+                rec.last_touched().as_u64(),
+                rec.record_data_size()
+            );
+        }
+        out += &format!("Subkey Cache Count: {}\n", self.subkey_cache.len());
+        out += &format!(
+            "Subkey Cache Total Size: {}\n",
+            self.subkey_cache_total_size
+        );
+        out += &format!("Total Storage Space: {}\n", self.total_storage_space);
+        out += &format!("Dead Records: {}\n", self.dead_records.len());
+        for dr in &self.dead_records {
+            out += &format!("  {}\n", dr.0.key.to_string());
+        }
+        out += &format!("Changed Records: {}\n", self.changed_records.len());
+        for cr in &self.changed_records {
+            out += &format!("  {}\n", cr.key.to_string());
+        }
+
+        out
+    }
+}
@@ -0,0 +1,16 @@
+/// Configuration for the record store
+#[derive(Debug, Default, Copy, Clone)]
+pub struct RecordStoreLimits {
+    /// Number of subkeys to keep in the memory cache
+    pub subkey_cache_size: usize,
+    /// Maximum size of an individual subkey
+    pub max_subkey_size: usize,
+    /// Maximum total record data size per record
+    pub max_record_total_size: usize,
+    /// Limit on the total number of records in the table store
+    pub max_records: Option<usize>,
+    /// Limit on the amount of subkey cache memory to use before evicting cache items
+    pub max_subkey_cache_memory_mb: Option<usize>,
+    /// Limit on the amount of storage space to use for subkey data and record data
+    pub max_storage_space_mb: Option<usize>,
+}
@@ -0,0 +1,225 @@
+use super::*;
+
+/// The context of the do_get_value operation
+struct DoSetValueContext {
+    /// The latest value of the subkey, may be the value passed in
+    pub value: SignedValueData,
+    /// The consensus count for the value we have received
+    pub value_count: usize,
+    /// The parsed schema from the descriptor if we have one
+    pub schema: DHTSchema,
+}
+
+impl StorageManager {
+
+    /// Perform a 'set value' query on the network
+    pub async fn outbound_set_value(
+        &self,
+        rpc_processor: RPCProcessor,
+        key: TypedKey,
+        subkey: ValueSubkey,
+        safety_selection: SafetySelection,
+        value: SignedValueData,
+        descriptor: SignedValueDescriptor,
+    ) -> VeilidAPIResult<SignedValueData> {
+        let routing_table = rpc_processor.routing_table();
+
+        // Get the DHT parameters for 'SetValue'
+        let (key_count, consensus_count, fanout, timeout_us) = {
+            let c = self.unlocked_inner.config.get();
+            (
+                c.network.dht.max_find_node_count as usize,
+                c.network.dht.set_value_count as usize,
+                c.network.dht.set_value_fanout as usize,
+                TimestampDuration::from(ms_to_us(c.network.dht.set_value_timeout_ms)),
+            )
+        };
+
+        // Make do-set-value answer context
+        let schema = descriptor.schema()?;
+        let context = Arc::new(Mutex::new(DoSetValueContext {
+            value,
+            value_count: 0,
+            schema,
+        }));
+
+        // Routine to call to generate fanout
+        let call_routine = |next_node: NodeRef| {
+            let rpc_processor = rpc_processor.clone();
+            let context = context.clone();
+            let descriptor = descriptor.clone();
+            async move {
+
+                let send_descriptor = true; // xxx check if next_node needs the descriptor or not
+
+                // get most recent value to send
+                let value = {
+                    let ctx = context.lock();
+                    ctx.value.clone()
+                };
+
+                // send across the wire
+                let vres = rpc_processor
+                    .clone()
+                    .rpc_call_set_value(
+                        Destination::direct(next_node).with_safety(safety_selection),
+                        key,
+                        subkey,
+                        value,
+                        descriptor.clone(),
+                        send_descriptor,
+                    )
+                    .await?;
+                let sva = network_result_value_or_log!(vres => {
+                    // Any other failures, just try the next node
+                    return Ok(None);
+                });
+
+                // If the node was close enough to possibly set the value
+                if sva.answer.set {
+                    let mut ctx = context.lock();
+
+                    // Keep the value if we got one and it is newer and it passes schema validation
+                    if let Some(value) = sva.answer.value {
+
+                        // Validate with schema
+                        if !ctx.schema.check_subkey_value_data(
+                            descriptor.owner(),
+                            subkey,
+                            value.value_data(),
+                        ) {
+                            // Validation failed, ignore this value
+                            // Move to the next node
+                            return Ok(None);
+                        }
+
+                        // We have a prior value, ensure this is a newer sequence number
+                        let prior_seq = ctx.value.value_data().seq();
+                        let new_seq = value.value_data().seq();
+                        if new_seq > prior_seq {
+                            // If the sequence number is greater, keep it
+                            ctx.value = value;
+                            // One node has show us this value so far
+                            ctx.value_count = 1;
+                        } else {
+                            // If the sequence number is older, or an equal sequence number, 
+                            // node should have not returned a value here.
+                            // Skip this node's closer list because it is misbehaving
+                            return Ok(None);
+                        }
+                    }
+                    else
+                    {
+                        // It was set on this node and no newer value was found and returned,
+                        // so increase our consensus count
+                        ctx.value_count += 1;
+                    }
+                }
+
+                // Return peers if we have some
+                Ok(Some(sva.answer.peers))
+            }
+        };
+
+        // Routine to call to check if we're done at each step
+        let check_done = |_closest_nodes: &[NodeRef]| {
+            // If we have reached sufficient consensus, return done
+            let ctx = context.lock();
+            if ctx.value_count >= consensus_count {
+                return Some(());
+            }
+            None
+        };
+
+        // Call the fanout
+        let fanout_call = FanoutCall::new(
+            routing_table.clone(),
+            key,
+            key_count,
+            fanout,
+            timeout_us,
+            call_routine,
+            check_done,
+        );
+
+        match fanout_call.run().await {
+            // If we don't finish in the timeout (too much time passed checking for consensus)
+            TimeoutOr::Timeout | 
+            // If we finished with consensus (enough nodes returning the same value)
+            TimeoutOr::Value(Ok(Some(()))) | 
+            // If we finished without consensus (ran out of nodes before getting consensus)
+            TimeoutOr::Value(Ok(None)) => {
+                // Return the best answer we've got
+                let ctx = context.lock();
+                Ok(ctx.value.clone())
+            }
+            // Failed
+            TimeoutOr::Value(Err(e)) => {
+                // If we finished with an error, return that
+                Err(e.into())
+            }
+        }
+    }
+
+    /// Handle a recieved 'Set Value' query
+    /// Returns a None if the value passed in was set
+    /// Returns a Some(current value) if the value was older and the current value was kept
+    pub async fn inbound_set_value(&self, key: TypedKey, subkey: ValueSubkey, value: SignedValueData, descriptor: Option<SignedValueDescriptor>) -> VeilidAPIResult<NetworkResult<Option<SignedValueData>>> {
+        let mut inner = self.lock().await?;
+
+        // See if the subkey we are modifying has a last known local value
+        let last_subkey_result = inner.handle_get_local_value(key, subkey, true).await?;
+
+        // Make sure this value would actually be newer
+        if let Some(last_value) = &last_subkey_result.value {
+            if value.value_data().seq() < last_value.value_data().seq() {
+                // inbound value is older than the one we have, just return the one we have
+                return Ok(NetworkResult::value(Some(last_value.clone())));
+            }
+        }
+
+        // Get the descriptor and schema for the key
+        let actual_descriptor = match last_subkey_result.descriptor {
+            Some(last_descriptor) => {
+                if let Some(descriptor) = descriptor {
+                    // Descriptor must match last one if it is provided
+                    if descriptor.cmp_no_sig(&last_descriptor) != cmp::Ordering::Equal {
+                        return Ok(NetworkResult::invalid_message("setvalue descriptor does not match last descriptor"));
+                    }
+                } else {
+                    // Descriptor was not provided always go with last descriptor
+                }
+                last_descriptor
+            }   
+            None => {
+                if let Some(descriptor) = descriptor {
+                    descriptor
+                } else {
+                    // No descriptor
+                    return Ok(NetworkResult::invalid_message("descriptor must be provided"));
+                }
+            }
+        };
+        let Ok(schema) = actual_descriptor.schema() else {
+            return Ok(NetworkResult::invalid_message("invalid schema"));
+        };
+
+        // Validate new value with schema
+        if !schema.check_subkey_value_data(actual_descriptor.owner(), subkey, value.value_data()) {
+            // Validation failed, ignore this value
+            return Ok(NetworkResult::invalid_message("failed schema validation"));
+        }
+
+        // Do the set and return no new value
+        match inner.handle_set_remote_value(key, subkey, value, actual_descriptor).await {            
+            Ok(()) => {},
+            Err(VeilidAPIError::Internal { message }) => {
+                apibail_internal!(message);
+            },
+            Err(e) => {
+                return Ok(NetworkResult::invalid_message(e));
+            },
+        }
+        Ok(NetworkResult::value(None))
+    }
+}
@@ -0,0 +1,439 @@
+use super::*;
+
+/// Locked structure for storage manager
+pub(super) struct StorageManagerInner {
+    unlocked_inner: Arc<StorageManagerUnlockedInner>,
+    /// If we are started up
+    pub initialized: bool,
+    /// Records that have been 'opened' and are not yet closed
+    pub opened_records: HashMap<TypedKey, OpenedRecord>,
+    /// Records that have ever been 'created' or 'opened' by this node, things we care about that we must republish to keep alive
+    pub local_record_store: Option<RecordStore<LocalRecordDetail>>,
+    /// Records that have been pushed to this node for distribution by other nodes, that we make an effort to republish
+    pub remote_record_store: Option<RecordStore<RemoteRecordDetail>>,
+    /// Record subkeys that have not been pushed to the network because they were written to offline
+    pub offline_subkey_writes: HashMap<TypedKey, ValueSubkeyRangeSet>,
+    /// Storage manager metadata that is persistent, including copy of offline subkey writes
+    pub metadata_db: Option<TableDB>,
+    /// RPC processor if it is available
+    pub rpc_processor: Option<RPCProcessor>,
+    /// Background processing task (not part of attachment manager tick tree so it happens when detached too)
+    pub tick_future: Option<SendPinBoxFuture<()>>,
+}
+
+fn local_limits_from_config(config: VeilidConfig) -> RecordStoreLimits {
+    let c = config.get();
+    RecordStoreLimits {
+        subkey_cache_size: c.network.dht.local_subkey_cache_size as usize,
+        max_subkey_size: MAX_SUBKEY_SIZE,
+        max_record_total_size: MAX_RECORD_DATA_SIZE,
+        max_records: None,
+        max_subkey_cache_memory_mb: Some(
+            c.network.dht.local_max_subkey_cache_memory_mb as usize,
+        ),
+        max_storage_space_mb: None,
+    }
+}
+
+fn remote_limits_from_config(config: VeilidConfig) -> RecordStoreLimits {
+    let c = config.get();
+    RecordStoreLimits {
+        subkey_cache_size: c.network.dht.remote_subkey_cache_size as usize,
+        max_subkey_size: MAX_SUBKEY_SIZE,
+        max_record_total_size: MAX_RECORD_DATA_SIZE,
+        max_records: Some(c.network.dht.remote_max_records as usize),
+        max_subkey_cache_memory_mb: Some(
+            c.network.dht.remote_max_subkey_cache_memory_mb as usize,
+        ),
+        max_storage_space_mb: Some(c.network.dht.remote_max_storage_space_mb as usize),
+    }
+}
+
+impl StorageManagerInner {
+    pub fn new(unlocked_inner: Arc<StorageManagerUnlockedInner>) -> Self {
+        Self {
+            unlocked_inner,
+            initialized: false,
+            opened_records: Default::default(),
+            local_record_store: Default::default(),
+            remote_record_store: Default::default(),
+            offline_subkey_writes: Default::default(),
+            metadata_db: Default::default(),
+            rpc_processor: Default::default(),
+            tick_future: Default::default(),
+        }
+    }
+
+    pub async fn init(&mut self, outer_self: StorageManager) -> EyreResult<()> {
+
+        let metadata_db = self.unlocked_inner
+            .table_store
+            .open(&format!("storage_manager_metadata"), 1)
+            .await?;
+
+        let local_limits = local_limits_from_config(self.unlocked_inner.config.clone());
+        let remote_limits = remote_limits_from_config(self.unlocked_inner.config.clone());
+
+        let mut local_record_store = RecordStore::new(
+            self.unlocked_inner.table_store.clone(),
+            "local",
+            local_limits,
+        );
+        local_record_store.init().await?;
+
+        let mut remote_record_store = RecordStore::new(
+            self.unlocked_inner.table_store.clone(),
+            "remote",
+            remote_limits,
+        );
+        remote_record_store.init().await?;
+
+        self.metadata_db = Some(metadata_db);
+        self.local_record_store = Some(local_record_store);
+        self.remote_record_store = Some(remote_record_store);
+
+        self.load_metadata().await?;
+
+        // Schedule tick
+        let tick_future = interval(1000, move || {
+            let this = outer_self.clone();
+            async move {
+                if let Err(e) = this.tick().await {
+                    log_stor!(warn "storage manager tick failed: {}", e);
+                }
+            }
+        });
+        self.tick_future = Some(tick_future);
+
+        self.initialized = true;
+
+        Ok(())
+    }
+
+    pub async fn terminate(&mut self) {
+
+        // Stop ticker
+        let tick_future = self.tick_future.take();
+        if let Some(f) = tick_future {
+            f.await;
+        }
+
+        // Final flush on record stores
+        if let Some(mut local_record_store) = self.local_record_store.take() {
+            if let Err(e) = local_record_store.tick().await {
+                log_stor!(error "termination local record store tick failed: {}", e); 
+            }
+        }
+        if let Some(mut remote_record_store) = self.remote_record_store.take() {
+            if let Err(e) = remote_record_store.tick().await {
+                log_stor!(error "termination remote record store tick failed: {}", e); 
+            }
+        }
+
+        // Save metadata
+        if self.metadata_db.is_some() {
+            if let Err(e) = self.save_metadata().await {
+                log_stor!(error "termination metadata save failed: {}", e); 
+            }
+            self.metadata_db = None;
+        }
+        self.offline_subkey_writes.clear();
+
+        // Mark not initialized
+        self.initialized = false;
+    }
+
+    async fn save_metadata(&mut self) -> EyreResult<()>{
+        if let Some(metadata_db) = &self.metadata_db {
+            let tx = metadata_db.transact();
+            tx.store_rkyv(0, b"offline_subkey_writes", &self.offline_subkey_writes)?;
+            tx.commit().await.wrap_err("failed to commit")?
+        }
+        Ok(())
+    }
+
+    async fn load_metadata(&mut self) -> EyreResult<()> {
+        if let Some(metadata_db) = &self.metadata_db {
+            self.offline_subkey_writes = match metadata_db.load_rkyv(0, b"offline_subkey_writes").await {
+                Ok(v) => v.unwrap_or_default(),
+                Err(_) => {
+                    if let Err(e) = metadata_db.delete(0,b"offline_subkey_writes").await {
+                        debug!("offline_subkey_writes format changed, clearing: {}", e);
+                    }
+                    Default::default()
+                }
+            }
+        }
+        Ok(())
+    }
+
+    pub async fn create_new_owned_local_record(
+        &mut self,
+        kind: CryptoKind,
+        schema: DHTSchema,
+        safety_selection: SafetySelection,
+    ) -> VeilidAPIResult<(TypedKey, KeyPair)> {
+        // Get cryptosystem
+        let Some(vcrypto) = self.unlocked_inner.crypto.get(kind) else {
+            apibail_generic!("unsupported cryptosystem");
+        };
+
+        // Get local record store
+        let Some(local_record_store) = self.local_record_store.as_mut() else {
+            apibail_not_initialized!();
+        };
+
+        // Compile the dht schema
+        let schema_data = schema.compile();
+
+        // New values require a new owner key
+        let owner = vcrypto.generate_keypair();
+
+        // Make a signed value descriptor for this dht value
+        let signed_value_descriptor = SignedValueDescriptor::make_signature(
+            owner.key,
+            schema_data,
+            vcrypto.clone(),
+            owner.secret,
+        )?;
+
+        // Add new local value record
+        let cur_ts = get_aligned_timestamp();
+        let local_record_detail = LocalRecordDetail { safety_selection };
+        let record =
+            Record::<LocalRecordDetail>::new(cur_ts, signed_value_descriptor, local_record_detail)?;
+
+        let dht_key = Self::get_key(vcrypto.clone(), &record);
+        local_record_store.new_record(dht_key, record).await?;
+
+        Ok((dht_key, owner))
+    }
+
+    pub fn open_existing_record(
+        &mut self,
+        key: TypedKey,
+        writer: Option<KeyPair>,
+        safety_selection: SafetySelection,
+    ) -> VeilidAPIResult<Option<DHTRecordDescriptor>> {
+        // Ensure the record is closed
+        if self.opened_records.contains_key(&key) {
+            apibail_generic!("record is already open and should be closed first");
+        }
+
+        // Get local record store
+        let Some(local_record_store) = self.local_record_store.as_mut() else {
+            apibail_not_initialized!();
+        };
+
+        // See if we have a local record already or not
+        let cb = |r: &mut Record<LocalRecordDetail>| {
+            // Process local record
+
+            // Keep the safety selection we opened the record with
+            r.detail_mut().safety_selection = safety_selection;
+
+            // Return record details
+            (r.owner().clone(), r.schema())
+        };
+        let Some((owner, schema)) = local_record_store.with_record_mut(key, cb) else {
+            return Ok(None);
+        };
+        // Had local record
+
+        // If the writer we chose is also the owner, we have the owner secret
+        // Otherwise this is just another subkey writer
+        let owner_secret = if let Some(writer) = writer {
+            if writer.key == owner {
+                Some(writer.secret)
+            } else {
+                None
+            }
+        } else {
+            None
+        };
+
+        // Write open record
+        self.opened_records
+            .insert(key, OpenedRecord::new(writer, safety_selection));
+
+        // Make DHT Record Descriptor to return
+        let descriptor = DHTRecordDescriptor::new(key, owner, owner_secret, schema);
+        Ok(Some(descriptor))
+    }
+
+    pub async fn open_new_record(
+        &mut self,
+        key: TypedKey,
+        writer: Option<KeyPair>,
+        subkey: ValueSubkey,
+        subkey_result: SubkeyResult,
+        safety_selection: SafetySelection,
+    ) -> VeilidAPIResult<DHTRecordDescriptor> {
+        // Ensure the record is closed
+        if self.opened_records.contains_key(&key) {
+            panic!("new record should never be opened at this point");
+        }
+
+        // Must have descriptor
+        let Some(signed_value_descriptor) = subkey_result.descriptor else {
+            // No descriptor for new record, can't store this
+            apibail_generic!("no descriptor");
+        };
+        // Get owner
+        let owner = signed_value_descriptor.owner().clone();
+
+        // If the writer we chose is also the owner, we have the owner secret
+        // Otherwise this is just another subkey writer
+        let owner_secret = if let Some(writer) = writer {
+            if writer.key == owner {
+                Some(writer.secret)
+            } else {
+                None
+            }
+        } else {
+            None
+        };
+        let schema = signed_value_descriptor.schema()?;
+
+        // Get local record store
+        let Some(local_record_store) = self.local_record_store.as_mut() else {
+            apibail_not_initialized!();
+        };
+
+        // Make and store a new record for this descriptor
+        let record = Record::<LocalRecordDetail>::new(
+            get_aligned_timestamp(),
+            signed_value_descriptor,
+            LocalRecordDetail { safety_selection },
+        )?;
+        local_record_store.new_record(key, record).await?;
+
+        // If we got a subkey with the getvalue, it has already been validated against the schema, so store it
+        if let Some(signed_value_data) = subkey_result.value {
+            // Write subkey to local store
+            local_record_store
+                .set_subkey(key, subkey, signed_value_data)
+                .await?;
+        }
+
+        // Write open record
+        self.opened_records
+            .insert(key, OpenedRecord::new(writer, safety_selection));
+
+        // Make DHT Record Descriptor to return
+        let descriptor = DHTRecordDescriptor::new(key, owner, owner_secret, schema);
+        Ok(descriptor)
+    }
+
+    pub fn close_record(&mut self, key: TypedKey) -> VeilidAPIResult<()> {
+        let Some(_opened_record) = self.opened_records.remove(&key) else {
+            apibail_generic!("record not open");
+        };
+        Ok(())
+    }
+
+    pub async fn handle_get_local_value(
+        &mut self,
+        key: TypedKey,
+        subkey: ValueSubkey,
+        want_descriptor: bool,
+    ) -> VeilidAPIResult<SubkeyResult> {
+        // See if it's in the local record store
+        let Some(local_record_store) = self.local_record_store.as_mut() else {
+            apibail_not_initialized!();
+        };
+        if let Some(subkey_result) = local_record_store.get_subkey(key, subkey, want_descriptor).await? {
+            return Ok(subkey_result);
+        }
+
+        Ok(SubkeyResult {
+            value: None,
+            descriptor: None,
+        })
+    }
+
+    pub async fn handle_set_local_value(
+        &mut self,
+        key: TypedKey,
+        subkey: ValueSubkey,
+        signed_value_data: SignedValueData,
+    ) -> VeilidAPIResult<()> {
+        // See if it's in the local record store
+        let Some(local_record_store) = self.local_record_store.as_mut() else {
+            apibail_not_initialized!();                 
+        };
+
+        // Write subkey to local store
+        local_record_store
+            .set_subkey(key, subkey, signed_value_data)
+            .await?;
+
+        Ok(())
+    }
+
+    pub async fn handle_get_remote_value(
+        &mut self,
+        key: TypedKey,
+        subkey: ValueSubkey,
+        want_descriptor: bool,
+    ) -> VeilidAPIResult<SubkeyResult> {
+        // See if it's in the remote record store
+        let Some(remote_record_store) = self.remote_record_store.as_mut() else {
+            apibail_not_initialized!();
+        };
+        if let Some(subkey_result) = remote_record_store.get_subkey(key, subkey, want_descriptor).await? {
+            return Ok(subkey_result);
+        }
+
+        Ok(SubkeyResult {
+            value: None,
+            descriptor: None,
+        })
+    }
+
+    pub async fn handle_set_remote_value(
+        &mut self,
+        key: TypedKey,
+        subkey: ValueSubkey,
+        signed_value_data: SignedValueData,
+        signed_value_descriptor: SignedValueDescriptor,
+    ) -> VeilidAPIResult<()> {
+        // See if it's in the remote record store
+        let Some(remote_record_store) = self.remote_record_store.as_mut() else {
+            apibail_not_initialized!();
+        };
+
+        // See if we have a remote record already or not
+        if remote_record_store.with_record(key, |_|{}).is_none() {
+            // record didn't exist, make it
+            let cur_ts = get_aligned_timestamp();
+            let remote_record_detail = RemoteRecordDetail { };
+            let record =
+                Record::<RemoteRecordDetail>::new(cur_ts, signed_value_descriptor, remote_record_detail)?;
+            remote_record_store.new_record(key, record).await?
+        };
+
+        // Write subkey to remote store
+        remote_record_store
+            .set_subkey(key, subkey, signed_value_data)
+            .await?;
+
+        Ok(())
+    }
+
+    /// # DHT Key = Hash(ownerKeyKind) of: [ ownerKeyValue, schema ]
+    fn get_key<D>(vcrypto: CryptoSystemVersion, record: &Record<D>) -> TypedKey
+    where
+        D: Clone + RkyvArchive + RkyvSerialize<DefaultVeilidRkyvSerializer>,
+        for<'t> <D as RkyvArchive>::Archived: CheckBytes<RkyvDefaultValidator<'t>>,
+        <D as RkyvArchive>::Archived: RkyvDeserialize<D, VeilidSharedDeserializeMap>,
+    {
+        let compiled = record.descriptor().schema_data();
+        let mut hash_data = Vec::<u8>::with_capacity(PUBLIC_KEY_LENGTH + 4 + compiled.len());
+        hash_data.extend_from_slice(&vcrypto.kind().0);
+        hash_data.extend_from_slice(&record.owner().bytes);
+        hash_data.extend_from_slice(compiled);
+        let hash = vcrypto.generate_hash(&hash_data);
+        TypedKey::new(vcrypto.kind(), hash)
+    }
+}
@@ -0,0 +1,21 @@
+use super::*;
+
+impl StorageManager {
+    // Flush records stores to disk and remove dead records
+    #[instrument(level = "trace", skip(self), err)]
+    pub(crate) async fn flush_record_stores_task_routine(
+        self,
+        stop_token: StopToken,
+        _last_ts: Timestamp,
+        _cur_ts: Timestamp,
+    ) -> EyreResult<()> {
+        let mut inner = self.inner.lock().await;
+        if let Some(local_record_store) = &mut inner.local_record_store {
+            local_record_store.tick().await?;
+        }
+        if let Some(remote_record_store) = &mut inner.remote_record_store {
+            remote_record_store.tick().await?;
+        }
+        Ok(())
+    }
+}
@@ -0,0 +1,43 @@
+pub mod flush_record_stores;
+
+use super::*;
+
+impl StorageManager {
+    pub(crate) fn setup_tasks(&self) {
+        // Set rolling transfers tick task
+        debug!("starting flush record stores task");
+        {
+            let this = self.clone();
+            self.unlocked_inner
+                .flush_record_stores_task
+                .set_routine(move |s, l, t| {
+                    Box::pin(
+                        this.clone()
+                            .flush_record_stores_task_routine(
+                                s,
+                                Timestamp::new(l),
+                                Timestamp::new(t),
+                            )
+                            .instrument(trace_span!(
+                                parent: None,
+                                "StorageManager flush record stores task routine"
+                            )),
+                    )
+                });
+        }
+    }
+
+    pub async fn tick(&self) -> EyreResult<()> {
+        // Run the rolling transfers task
+        self.unlocked_inner.flush_record_stores_task.tick().await?;
+
+        Ok(())
+    }
+
+    pub(crate) async fn cancel_tasks(&self) {
+        debug!("stopping flush record stores task");
+        if let Err(e) = self.unlocked_inner.flush_record_stores_task.stop().await {
+            warn!("flush_record_stores_task not stopped: {}", e);
+        }
+    }
+}
@@ -0,0 +1,12 @@
+use super::*;
+
+/// Information required to handle locally opened records
+#[derive(
+    Clone, Debug, PartialEq, Eq, Serialize, Deserialize, RkyvArchive, RkyvSerialize, RkyvDeserialize,
+)]
+#[archive_attr(repr(C), derive(CheckBytes))]
+pub struct LocalRecordDetail {
+    /// The last 'safety selection' used when creating/opening this record.
+    /// Even when closed, this safety selection applies to re-publication attempts by the system.
+    pub safety_selection: SafetySelection,
+}
@@ -0,0 +1,17 @@
+mod local_record_detail;
+mod opened_record;
+mod record;
+mod record_data;
+mod remote_record_detail;
+mod signed_value_data;
+mod signed_value_descriptor;
+
+use super::*;
+
+pub use local_record_detail::*;
+pub use opened_record::*;
+pub use record::*;
+pub use record_data::*;
+pub use remote_record_detail::*;
+pub use signed_value_data::*;
+pub use signed_value_descriptor::*;
@@ -0,0 +1,31 @@
+use super::*;
+
+/// The state associated with a local record when it is opened
+/// This is not serialized to storage as it is ephemeral for the lifetime of the opened record
+#[derive(Clone, Debug, Default)]
+pub struct OpenedRecord {
+    /// The key pair used to perform writes to subkey on this opened record
+    /// Without this, set_value() will fail regardless of which key or subkey is being written to
+    /// as all writes are signed
+    writer: Option<KeyPair>,
+
+    /// The safety selection in current use
+    safety_selection: SafetySelection,
+}
+
+impl OpenedRecord {
+    pub fn new(writer: Option<KeyPair>, safety_selection: SafetySelection) -> Self {
+        Self {
+            writer,
+            safety_selection,
+        }
+    }
+
+    pub fn writer(&self) -> Option<&KeyPair> {
+        self.writer.as_ref()
+    }
+
+    pub fn safety_selection(&self) -> SafetySelection {
+        self.safety_selection
+    }
+}
@@ -0,0 +1,84 @@
+use super::*;
+
+#[derive(
+    Clone, Debug, PartialEq, Eq, Serialize, Deserialize, RkyvArchive, RkyvSerialize, RkyvDeserialize,
+)]
+#[archive_attr(repr(C), derive(CheckBytes))]
+pub struct Record<D>
+where
+    D: Clone + RkyvArchive + RkyvSerialize<DefaultVeilidRkyvSerializer>,
+    for<'t> <D as RkyvArchive>::Archived: CheckBytes<RkyvDefaultValidator<'t>>,
+    <D as RkyvArchive>::Archived: RkyvDeserialize<D, VeilidSharedDeserializeMap>,
+{
+    descriptor: SignedValueDescriptor,
+    subkey_count: usize,
+    last_touched_ts: Timestamp,
+    record_data_size: usize,
+    detail: D,
+}
+
+impl<D> Record<D>
+where
+    D: Clone + RkyvArchive + RkyvSerialize<DefaultVeilidRkyvSerializer>,
+    for<'t> <D as RkyvArchive>::Archived: CheckBytes<RkyvDefaultValidator<'t>>,
+    <D as RkyvArchive>::Archived: RkyvDeserialize<D, VeilidSharedDeserializeMap>,
+{
+    pub fn new(
+        cur_ts: Timestamp,
+        descriptor: SignedValueDescriptor,
+        detail: D,
+    ) -> VeilidAPIResult<Self> {
+        let schema = descriptor.schema()?;
+        let subkey_count = schema.subkey_count();
+        Ok(Self {
+            descriptor,
+            subkey_count,
+            last_touched_ts: cur_ts,
+            record_data_size: 0,
+            detail,
+        })
+    }
+
+    pub fn descriptor(&self) -> &SignedValueDescriptor {
+        &self.descriptor
+    }
+    pub fn owner(&self) -> &PublicKey {
+        self.descriptor.owner()
+    }
+
+    pub fn subkey_count(&self) -> usize {
+        self.subkey_count
+    }
+
+    pub fn touch(&mut self, cur_ts: Timestamp) {
+        self.last_touched_ts = cur_ts
+    }
+
+    pub fn last_touched(&self) -> Timestamp {
+        self.last_touched_ts
+    }
+
+    pub fn set_record_data_size(&mut self, size: usize) {
+        self.record_data_size = size;
+    }
+
+    pub fn record_data_size(&self) -> usize {
+        self.record_data_size
+    }
+
+    pub fn schema(&self) -> DHTSchema {
+        // unwrap is safe here because descriptor is immutable and set in new()
+        self.descriptor.schema().unwrap()
+    }
+
+    pub fn total_size(&self) -> usize {
+        mem::size_of::<Record<D>>() + self.descriptor.total_size() + self.record_data_size
+    }
+
+    pub fn detail(&self) -> &D {
+        &self.detail
+    }
+    pub fn detail_mut(&mut self) -> &mut D {
+        &mut self.detail
+    }
+}
@@ -0,0 +1,31 @@
+use super::*;
+
+#[derive(
+    Clone,
+    Debug,
+    PartialEq,
+    Eq,
+    PartialOrd,
+    Ord,
+    Serialize,
+    Deserialize,
+    RkyvArchive,
+    RkyvSerialize,
+    RkyvDeserialize,
+)]
+#[archive_attr(repr(C), derive(CheckBytes))]
+pub struct RecordData {
+    signed_value_data: SignedValueData,
+}
+
+impl RecordData {
+    pub fn new(signed_value_data: SignedValueData) -> Self {
+        Self { signed_value_data }
+    }
+    pub fn signed_value_data(&self) -> &SignedValueData {
+        &self.signed_value_data
+    }
+    pub fn total_size(&self) -> usize {
+        mem::size_of::<RecordData>() + self.signed_value_data.value_data().data().len()
+    }
+}
@@ -0,0 +1,7 @@
+use super::*;
+
+#[derive(
+    Clone, Debug, PartialEq, Eq, Serialize, Deserialize, RkyvArchive, RkyvSerialize, RkyvDeserialize,
+)]
+#[archive_attr(repr(C), derive(CheckBytes))]
+pub struct RemoteRecordDetail {}
@@ -0,0 +1,95 @@
+use super::*;
+
+/////////////////////////////////////////////////////////////////////////////////////////////////////
+///
+
+#[derive(
+    Clone,
+    Debug,
+    PartialOrd,
+    PartialEq,
+    Eq,
+    Ord,
+    Serialize,
+    Deserialize,
+    RkyvArchive,
+    RkyvSerialize,
+    RkyvDeserialize,
+)]
+#[archive_attr(repr(C), derive(CheckBytes))]
+pub struct SignedValueData {
+    value_data: ValueData,
+    signature: Signature,
+}
+impl SignedValueData {
+    pub fn new(value_data: ValueData, signature: Signature) -> Self {
+        Self {
+            value_data,
+            signature,
+        }
+    }
+
+    pub fn validate(
+        &self,
+        owner: &PublicKey,
+        subkey: ValueSubkey,
+        vcrypto: CryptoSystemVersion,
+    ) -> VeilidAPIResult<()> {
+        let node_info_bytes = Self::make_signature_bytes(&self.value_data, owner, subkey)?;
+        // validate signature
+        vcrypto.verify(&self.value_data.writer(), &node_info_bytes, &self.signature)
+    }
+
+    pub fn make_signature(
+        value_data: ValueData,
+        owner: &PublicKey,
+        subkey: ValueSubkey,
+        vcrypto: CryptoSystemVersion,
+        writer_secret: SecretKey,
+    ) -> VeilidAPIResult<Self> {
+        let node_info_bytes = Self::make_signature_bytes(&value_data, owner, subkey)?;
+
+        // create signature
+        let signature = vcrypto.sign(&value_data.writer(), &writer_secret, &node_info_bytes)?;
+        Ok(Self {
+            value_data,
+            signature,
+        })
+    }
+
+    pub fn value_data(&self) -> &ValueData {
+        &self.value_data
+    }
+
+    pub fn into_value_data(self) -> ValueData {
+        self.value_data
+    }
+
+    pub fn signature(&self) -> &Signature {
+        &self.signature
+    }
+
+    pub fn total_size(&self) -> usize {
+        (mem::size_of::<Self>() - mem::size_of::<ValueData>()) + self.value_data.total_size()
+    }
+
+    fn make_signature_bytes(
+        value_data: &ValueData,
+        owner: &PublicKey,
+        subkey: ValueSubkey,
+    ) -> VeilidAPIResult<Vec<u8>> {
+        let mut node_info_bytes =
+            Vec::with_capacity(PUBLIC_KEY_LENGTH + 4 + 4 + value_data.data().len());
+
+        // Add owner to signature
+        node_info_bytes.extend_from_slice(&owner.bytes);
+        // Add subkey to signature
+        node_info_bytes.extend_from_slice(&subkey.to_le_bytes());
+        // Add sequence number to signature
+        node_info_bytes.extend_from_slice(&value_data.seq().to_le_bytes());
+        // Add data to signature
+        node_info_bytes.extend_from_slice(value_data.data());
+
+        Ok(node_info_bytes)
+    }
+}
@@ -0,0 +1,81 @@
+use super::*;
+
+/////////////////////////////////////////////////////////////////////////////////////////////////////
+///
+
+#[derive(
+    Clone,
+    Debug,
+    PartialOrd,
+    PartialEq,
+    Eq,
+    Ord,
+    Serialize,
+    Deserialize,
+    RkyvArchive,
+    RkyvSerialize,
+    RkyvDeserialize,
+)]
+#[archive_attr(repr(C), derive(CheckBytes))]
+pub struct SignedValueDescriptor {
+    owner: PublicKey,
+    schema_data: Vec<u8>,
+    signature: Signature,
+}
+impl SignedValueDescriptor {
+    pub fn new(owner: PublicKey, schema_data: Vec<u8>, signature: Signature) -> Self {
+        Self {
+            owner,
+            schema_data,
+            signature,
+        }
+    }
+
+    pub fn validate(&self, vcrypto: CryptoSystemVersion) -> VeilidAPIResult<()> {
+        // validate signature
+        vcrypto.verify(&self.owner, &self.schema_data, &self.signature)
+    }
+
+    pub fn owner(&self) -> &PublicKey {
+        &self.owner
+    }
+
+    pub fn schema_data(&self) -> &[u8] {
+        &self.schema_data
+    }
+
+    pub fn schema(&self) -> VeilidAPIResult<DHTSchema> {
+        DHTSchema::try_from(self.schema_data.as_slice())
+    }
+
+    pub fn signature(&self) -> &Signature {
+        &self.signature
+    }
+
+    pub fn make_signature(
+        owner: PublicKey,
+        schema_data: Vec<u8>,
+        vcrypto: CryptoSystemVersion,
+        owner_secret: SecretKey,
+    ) -> VeilidAPIResult<Self> {
+        // create signature
+        let signature = vcrypto.sign(&owner, &owner_secret, &schema_data)?;
+        Ok(Self {
+            owner,
+            schema_data,
+            signature,
+        })
+    }
+
+    pub fn total_size(&self) -> usize {
+        mem::size_of::<Self>() + self.schema_data.len()
+    }
+
+    pub fn cmp_no_sig(&self, other: &Self) -> cmp::Ordering {
+        let o = self.owner.cmp(&other.owner);
+        if o != cmp::Ordering::Equal {
+            return o;
+        }
+        self.schema_data.cmp(&other.schema_data)
+    }
+}