141
141
from bson .son import RE_TYPE
142
142
from bson .timestamp import Timestamp
143
143
from bson .tz_util import utc
144
+ from bson .vector import DTYPES , BinaryVector
144
145
145
146
_RE_OPT_TABLE = {
146
147
"i" : re .I ,
@@ -608,6 +609,26 @@ def _parse_canonical_binary(doc: Any, json_options: JSONOptions) -> Union[Binary
608
609
return _binary_or_uuid (data , int (subtype , 16 ), json_options )
609
610
610
611
612
+ def _parse_canonical_binary_vector (doc : Any , dummy0 : Any ) -> BinaryVector :
613
+ binary = doc ["$binaryVector" ]
614
+ b64 = binary ["base64" ]
615
+ dtype = getattr (DTYPES , binary ["dtype" ])
616
+ padding = binary ["padding" ]
617
+ if not isinstance (b64 , str ):
618
+ raise TypeError (f"$binaryVector base64 must be a string: { doc } " )
619
+ if not isinstance (dtype , DTYPES ):
620
+ raise TypeError (f"$binaryVector dtype must a member of bson.vector.DTYPES: { doc } " )
621
+ if not isinstance (padding , str ) or len (padding ) > 2 :
622
+ raise TypeError (f"$binaryVector padding must be a string at most 2 characters: { doc } " )
623
+ if len (binary ) != 3 :
624
+ raise TypeError (
625
+ f'$binaryVector must include only "base64", "dtype", and "padding" components: { doc } '
626
+ )
627
+
628
+ data = base64 .b64decode (b64 .encode ())
629
+ return BinaryVector (data , dtype , int (padding ))
630
+
631
+
611
632
def _parse_canonical_datetime (
612
633
doc : Any , json_options : JSONOptions
613
634
) -> Union [datetime .datetime , DatetimeMS ]:
@@ -820,6 +841,7 @@ def _parse_timestamp(doc: Any, dummy0: Any) -> Timestamp:
820
841
"$minKey" : _parse_canonical_minkey ,
821
842
"$maxKey" : _parse_canonical_maxkey ,
822
843
"$binary" : _parse_binary ,
844
+ "$binaryVector" : _parse_canonical_binary_vector ,
823
845
"$code" : _parse_canonical_code ,
824
846
"$uuid" : _parse_legacy_uuid ,
825
847
"$undefined" : lambda _ , _1 : None ,
@@ -841,6 +863,22 @@ def _encode_binary(data: bytes, subtype: int, json_options: JSONOptions) -> Any:
841
863
return {"$binary" : {"base64" : base64 .b64encode (data ).decode (), "subType" : "%02x" % subtype }}
842
864
843
865
866
+ def _encode_binary_vector (obj : Any , json_options : JSONOptions ) -> Any :
867
+ if json_options .json_mode == JSONMode .LEGACY :
868
+ return {
869
+ "$binaryVector" : base64 .b64encode (obj ).decode (),
870
+ "dtype" : "%s" % obj .dtype .name ,
871
+ "padding" : "%02x" % obj .padding ,
872
+ }
873
+ return {
874
+ "$binaryVector" : {
875
+ "base64" : base64 .b64encode (obj ).decode (),
876
+ "dtype" : "%s" % obj .dtype .name ,
877
+ "padding" : "%02x" % obj .padding ,
878
+ }
879
+ }
880
+
881
+
844
882
def _encode_datetimems (obj : Any , json_options : JSONOptions ) -> dict :
845
883
if (
846
884
json_options .datetime_representation == DatetimeRepresentation .ISO8601
@@ -992,6 +1030,7 @@ def _encode_maxkey(dummy0: Any, dummy1: Any) -> dict:
992
1030
str : _encode_noop ,
993
1031
type (None ): _encode_noop ,
994
1032
uuid .UUID : _encode_uuid ,
1033
+ BinaryVector : _encode_binary_vector ,
995
1034
Binary : _encode_binary_obj ,
996
1035
Int64 : _encode_int64 ,
997
1036
Code : _encode_code ,
0 commit comments