From 7a1a070dd83f464ef55a52745631f9d86126d04e Mon Sep 17 00:00:00 2001 From: Yoshihiro Sugi Date: Thu, 18 Apr 2024 22:48:06 +0900 Subject: [PATCH] feat: Add tid/record-key string format (#155) * Add tid/record-key string format * Fix tests * Add tests for tids * Update docs and changelogs --- atrium-api/CHANGELOG.md | 9 ++ atrium-api/src/types.rs | 108 +------------ atrium-api/src/types/string.rs | 179 +++++++++++++++++++-- lexicon/atrium-codegen/src/token_stream.rs | 5 +- lexicon/atrium-lex/src/lexicon.rs | 2 + 5 files changed, 180 insertions(+), 123 deletions(-) diff --git a/atrium-api/CHANGELOG.md b/atrium-api/CHANGELOG.md index 4bcd7555..e94fa41e 100644 --- a/atrium-api/CHANGELOG.md +++ b/atrium-api/CHANGELOG.md @@ -6,6 +6,15 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +### Added +- `atrium_api::types::string::Tid` +- `atrium_api::types::string::RecordKey` + - moved from `atrium_api::types::RecordKey` + +### Removed +- `atrium_api::types::RecordKey` + - moved to `atrium_api::types::string::RecordKey` + ## [0.20.1](https://github.com/sugyan/atrium/compare/atrium-api-v0.20.0...atrium-api-v0.20.1) - 2024-04-17 ### Added diff --git a/atrium-api/src/types.rs b/atrium-api/src/types.rs index 10ec2c6f..95876b1a 100644 --- a/atrium-api/src/types.rs +++ b/atrium-api/src/types.rs @@ -2,8 +2,7 @@ //! use ipld_core::ipld::Ipld; -use regex::Regex; -use std::{cell::OnceCell, fmt, ops::Deref, str::FromStr}; +use std::fmt; mod cid_link; pub use cid_link::CidLink; @@ -12,6 +11,7 @@ mod integer; pub use integer::*; pub mod string; +use string::RecordKey; /// Trait for a collection of records that can be stored in a repository. /// @@ -53,72 +53,6 @@ pub trait Collection: fmt::Debug { } } -/// A record key (`rkey`) used to name and reference an individual record within the same -/// collection of an atproto repository. -#[derive(Debug, Clone, PartialEq, Eq, serde::Serialize)] -pub struct RecordKey(String); - -impl RecordKey { - /// Returns the record key as a string slice. - pub fn as_str(&self) -> &str { - self.0.as_str() - } -} - -impl FromStr for RecordKey { - type Err = &'static str; - - #[allow( - clippy::borrow_interior_mutable_const, - clippy::declare_interior_mutable_const - )] - fn from_str(s: &str) -> Result { - const RE_RKEY: OnceCell = OnceCell::new(); - - if [".", ".."].contains(&s) { - Err("Disallowed rkey") - } else if !RE_RKEY - .get_or_init(|| Regex::new(r"^[a-zA-Z0-9._~-]{1,512}$").unwrap()) - .is_match(s) - { - Err("Invalid rkey") - } else { - Ok(Self(s.into())) - } - } -} - -impl<'de> serde::Deserialize<'de> for RecordKey { - fn deserialize(deserializer: D) -> Result - where - D: serde::Deserializer<'de>, - { - use serde::de::Error; - let value = serde::Deserialize::deserialize(deserializer)?; - Self::from_str(value).map_err(D::Error::custom) - } -} - -impl From for String { - fn from(value: RecordKey) -> Self { - value.0 - } -} - -impl AsRef for RecordKey { - fn as_ref(&self) -> &str { - self.as_str() - } -} - -impl Deref for RecordKey { - type Target = str; - - fn deref(&self) -> &Self::Target { - self.as_str() - } -} - /// Definitions for Blob types. /// Usually a map with `$type` is used, but deprecated legacy formats are also supported for parsing. /// @@ -180,44 +114,6 @@ mod tests { const CID_LINK_JSON: &str = r#"{"$link":"bafkreibme22gw2h7y2h7tg2fhqotaqjucnbc24deqo72b6mkl2egezxhvy"}"#; - #[test] - fn valid_rkey() { - // From https://atproto.com/specs/record-key#examples - for valid in &["3jui7kd54zh2y", "self", "example.com", "~1.2-3_", "dHJ1ZQ"] { - assert!( - from_str::(&format!("\"{}\"", valid)).is_ok(), - "valid rkey `{}` parsed as invalid", - valid, - ); - } - } - - #[test] - fn invalid_rkey() { - // From https://atproto.com/specs/record-key#examples - for invalid in &[ - "literal:self", - "alpha/beta", - ".", - "..", - "#extra", - "@handle", - "any space", - "any+space", - "number[3]", - "number(3)", - "\"quote\"", - "pre:fix", - "dHJ1ZQ==", - ] { - assert!( - from_str::(&format!("\"{}\"", invalid)).is_err(), - "invalid rkey `{}` parsed as valid", - invalid, - ); - } - } - #[test] fn test_cid_link_serde_json() { let deserialized = diff --git a/atrium-api/src/types/string.rs b/atrium-api/src/types/string.rs index 09bc5135..b2759069 100644 --- a/atrium-api/src/types/string.rs +++ b/atrium-api/src/types/string.rs @@ -413,6 +413,76 @@ impl Serialize for Language { } } +/// A [Timestamp Identifier]. +/// +/// [Timestamp Identifier]: https://atproto.com/specs/record-key#record-key-type-tid +#[derive(Clone, Debug, PartialEq, Eq, Serialize)] +#[serde(transparent)] +pub struct Tid(String); +string_newtype!(Tid); + +impl Tid { + #[allow( + clippy::borrow_interior_mutable_const, + clippy::declare_interior_mutable_const + )] + /// Parses a `TID` from the given string. + pub fn new(tid: String) -> Result { + const RE_TID: OnceCell = OnceCell::new(); + + if tid.len() != 13 { + Err("TID must be 13 characters") + } else if !RE_TID + .get_or_init(|| { + Regex::new(r"^[234567abcdefghij][234567abcdefghijklmnopqrstuvwxyz]{12}$").unwrap() + }) + .is_match(&tid) + { + Err("Invalid TID") + } else { + Ok(Self(tid)) + } + } + + /// Returns the TID as a string slice. + pub fn as_str(&self) -> &str { + self.0.as_str() + } +} + +/// A record key (`rkey`) used to name and reference an individual record within the same +/// collection of an atproto repository. +#[derive(Debug, Clone, PartialEq, Eq, serde::Serialize)] +pub struct RecordKey(String); +string_newtype!(RecordKey); + +impl RecordKey { + #[allow( + clippy::borrow_interior_mutable_const, + clippy::declare_interior_mutable_const + )] + /// Parses a `Record Key` from the given string. + pub fn new(s: String) -> Result { + const RE_RKEY: OnceCell = OnceCell::new(); + + if [".", ".."].contains(&s.as_str()) { + Err("Disallowed rkey") + } else if !RE_RKEY + .get_or_init(|| Regex::new(r"^[a-zA-Z0-9.\-_:~]{1,512}$").unwrap()) + .is_match(&s) + { + Err("Invalid rkey") + } else { + Ok(Self(s)) + } + } + + /// Returns the record key as a string slice. + pub fn as_str(&self) -> &str { + self.0.as_str() + } +} + #[cfg(test)] mod tests { use serde_json::{from_str, to_string}; @@ -422,7 +492,7 @@ mod tests { #[test] fn valid_datetime() { // From https://atproto.com/specs/lexicon#datetime - for valid in &[ + for valid in [ // preferred "1985-04-12T23:20:50.123Z", "1985-04-12T23:20:50.123456Z", @@ -446,7 +516,7 @@ mod tests { #[test] fn invalid_datetime() { // From https://atproto.com/specs/lexicon#datetime - for invalid in &[ + for invalid in [ "1985-04-12", "1985-04-12T23:20Z", "1985-04-12T23:20:5Z", @@ -488,7 +558,7 @@ mod tests { #[test] fn valid_did() { // From https://atproto.com/specs/did#examples - for valid in &[ + for valid in [ "did:plc:z72i7hdynmk6r22z27h6tvur", "did:web:blueskyweb.xyz", "did:method:val:two", @@ -508,7 +578,7 @@ mod tests { #[test] fn invalid_did() { // From https://atproto.com/specs/did#examples - for invalid in &[ + for invalid in [ "did:METHOD:val", "did:m123:val", "DID:method:val", @@ -528,7 +598,7 @@ mod tests { #[test] fn did_method() { // From https://atproto.com/specs/did#examples - for (method, did) in &[ + for (method, did) in [ ("did:plc", "did:plc:z72i7hdynmk6r22z27h6tvur"), ("did:web", "did:web:blueskyweb.xyz"), ("did:method", "did:method:val:two"), @@ -540,14 +610,14 @@ mod tests { "did:key:zQ3shZc2QzApp2oymGvQbzP8eKheVshBHbU4ZYjeXqwSKEn6N", ), ] { - assert_eq!(Did::new(did.to_string()).unwrap().method(), *method); + assert_eq!(Did::new(did.to_string()).unwrap().method(), method); } } #[test] fn valid_handle() { // From https://atproto.com/specs/handle#identifier-examples - for valid in &[ + for valid in [ "jay.bsky.social", "8.cn", "name.t--t", // not a real TLD, but syntax ok @@ -573,7 +643,7 @@ mod tests { #[test] fn invalid_handle() { // From https://atproto.com/specs/handle#identifier-examples - for invalid in &[ + for invalid in [ "jo@hn.test", "💩.test", "john..test", @@ -595,7 +665,7 @@ mod tests { #[test] fn valid_nsid() { // From https://atproto.com/specs/nsid#examples - for valid in &[ + for valid in [ "com.example.fooBar", "net.users.bob.ping", "a-0.b-1.c", @@ -613,7 +683,7 @@ mod tests { #[test] fn invalid_nsid() { // From https://atproto.com/specs/nsid#examples - for invalid in &["com.exa💩ple.thing", "com.example"] { + for invalid in ["com.exa💩ple.thing", "com.example"] { assert!( from_str::(&format!("\"{}\"", invalid)).is_err(), "invalid NSID `{}` parsed as valid", @@ -625,7 +695,7 @@ mod tests { #[test] fn nsid_parts() { // From https://atproto.com/specs/nsid#examples - for (nsid, domain_authority, name) in &[ + for (nsid, domain_authority, name) in [ ("com.example.fooBar", "com.example", "fooBar"), ("net.users.bob.ping", "net.users.bob", "ping"), ("a-0.b-1.c", "a-0.b-1", "c"), @@ -633,15 +703,15 @@ mod tests { ("cn.8.lex.stuff", "cn.8.lex", "stuff"), ] { let nsid = Nsid::new(nsid.to_string()).unwrap(); - assert_eq!(nsid.domain_authority(), *domain_authority); - assert_eq!(nsid.name(), *name); + assert_eq!(nsid.domain_authority(), domain_authority); + assert_eq!(nsid.name(), name); } } #[test] fn valid_language() { // From https://www.rfc-editor.org/rfc/rfc5646.html#appendix-A - for valid in &[ + for valid in [ // Simple language subtag: "de", // German "fr", // French @@ -700,7 +770,7 @@ mod tests { #[test] fn invalid_language() { // From https://www.rfc-editor.org/rfc/rfc5646.html#appendix-A - for invalid in &[ + for invalid in [ "de-419-DE", // two region tags // use of a single-character subtag in primary position; note that there are a // few grandfathered tags that start with "i-" that are valid @@ -713,4 +783,83 @@ mod tests { ); } } + + #[test] + fn valid_tid() { + for valid in ["3jzfcijpj2z2a", "7777777777777", "3zzzzzzzzzzzz"] { + assert!( + from_str::(&format!("\"{}\"", valid)).is_ok(), + "valid TID `{}` parsed as invalid", + valid, + ); + } + } + + #[test] + fn invalid_tid() { + for invalid in [ + // not base32 + "3jzfcijpj2z21", + "0000000000000", + // too long/short + "3jzfcijpj2z2aa", + "3jzfcijpj2z2", + // old dashes syntax not actually supported (TTTT-TTT-TTTT-CC) + "3jzf-cij-pj2z-2a", + // high bit can't be high + "zzzzzzzzzzzzz", + "kjzfcijpj2z2a", + ] { + assert!( + from_str::(&format!("\"{}\"", invalid)).is_err(), + "invalid TID `{}` parsed as valid", + invalid, + ); + } + } + + #[test] + fn valid_rkey() { + // From https://atproto.com/specs/record-key#examples + for valid in [ + "3jui7kd54zh2y", + "self", + "literal:self", + "example.com", + "~1.2-3_", + "dHJ1ZQ", + "pre:fix", + "_", + ] { + assert!( + from_str::(&format!("\"{}\"", valid)).is_ok(), + "valid rkey `{}` parsed as invalid", + valid, + ); + } + } + + #[test] + fn invalid_rkey() { + // From https://atproto.com/specs/record-key#examples + for invalid in [ + "alpha/beta", + ".", + "..", + "#extra", + "@handle", + "any space", + "any+space", + "number[3]", + "number(3)", + "\"quote\"", + "dHJ1ZQ==", + ] { + assert!( + from_str::(&format!("\"{}\"", invalid)).is_err(), + "invalid rkey `{}` parsed as valid", + invalid, + ); + } + } } diff --git a/lexicon/atrium-codegen/src/token_stream.rs b/lexicon/atrium-codegen/src/token_stream.rs index 172d8f15..a1f1a8e0 100644 --- a/lexicon/atrium-codegen/src/token_stream.rs +++ b/lexicon/atrium-codegen/src/token_stream.rs @@ -547,7 +547,6 @@ fn integer_type(integer: &LexInteger) -> Result<(TokenStream, TokenStream)> { fn string_type(string: &LexString) -> Result<(TokenStream, TokenStream)> { let description = description(&string.description); - // TODO: enum? let typ = match string.format { Some(LexStringFormat::AtIdentifier) => quote!(crate::types::string::AtIdentifier), Some(LexStringFormat::Cid) => quote!(crate::types::string::Cid), @@ -556,7 +555,9 @@ fn string_type(string: &LexString) -> Result<(TokenStream, TokenStream)> { Some(LexStringFormat::Handle) => quote!(crate::types::string::Handle), Some(LexStringFormat::Nsid) => quote!(crate::types::string::Nsid), Some(LexStringFormat::Language) => quote!(crate::types::string::Language), - // TODO: other formats + Some(LexStringFormat::Tid) => quote!(crate::types::string::Tid), + Some(LexStringFormat::RecordKey) => quote!(crate::types::string::RecordKey), + // TODO: other formats (uri, at-uri) _ => quote!(String), }; Ok((description, typ)) diff --git a/lexicon/atrium-lex/src/lexicon.rs b/lexicon/atrium-lex/src/lexicon.rs index 69d6760c..f9e9bff5 100644 --- a/lexicon/atrium-lex/src/lexicon.rs +++ b/lexicon/atrium-lex/src/lexicon.rs @@ -41,6 +41,8 @@ pub enum LexStringFormat { Nsid, Cid, Language, + Tid, + RecordKey, } #[skip_serializing_none] #[derive(Debug, Serialize, Deserialize, PartialEq, Eq, Clone)]