import pytest import logging import hypothesis.strategies as st import chromadb.test.property.strategies as strategies from chromadb.api import ClientAPI import chromadb.api.types as types from hypothesis.stateful import ( Bundle, RuleBasedStateMachine, rule, initialize, multiple, consumes, run_state_machine_as_test, MultipleResults, ) from typing import Any, Dict, Mapping, Optional import numpy from chromadb.test.property.strategies import hashing_embedding_function class CollectionStateMachine(RuleBasedStateMachine): collections: Bundle[strategies.ExternalCollection] _model: Dict[str, Optional[types.CollectionMetadata]] collections = Bundle("collections") def __init__(self, client: ClientAPI): super().__init__() self._model = {} self.client = client @initialize() def initialize(self) -> None: self.client.reset() self._model = {} @rule(target=collections, coll=strategies.collections()) def create_coll( self, coll: strategies.ExternalCollection ) -> MultipleResults[strategies.ExternalCollection]: # Metadata can either be None or a non-empty dict if coll.name in self.model or ( coll.metadata is not None and len(coll.metadata) == 0 ): with pytest.raises(Exception): c = self.client.create_collection( name=coll.name, metadata=coll.metadata, # type: ignore[arg-type] embedding_function=coll.embedding_function, ) return multiple() c = self.client.create_collection( name=coll.name, metadata=coll.metadata, # type: ignore[arg-type] embedding_function=coll.embedding_function, ) self.set_model(coll.name, coll.metadata) # type: ignore[arg-type] assert c.name == coll.name assert c.metadata == self.model[coll.name] return multiple(coll) @rule(coll=collections) def get_coll(self, coll: strategies.ExternalCollection) -> None: if coll.name in self.model: c = self.client.get_collection(name=coll.name) assert c.name == coll.name assert c.metadata == self.model[coll.name] else: with pytest.raises(Exception): self.client.get_collection(name=coll.name) @rule(coll=consumes(collections)) def delete_coll(self, coll: strategies.ExternalCollection) -> None: if coll.name in self.model: self.client.delete_collection(name=coll.name) self.delete_from_model(coll.name) else: with pytest.raises(Exception): self.client.delete_collection(name=coll.name) with pytest.raises(Exception): self.client.get_collection(name=coll.name) @rule() def list_collections(self) -> None: colls = self.client.list_collections() assert len(colls) == len(self.model) for collection_name in colls: assert collection_name in self.model # @rule for list_collections with limit and offset @rule( limit=st.integers(min_value=1, max_value=5), offset=st.integers(min_value=0, max_value=5), ) def list_collections_with_limit_offset(self, limit: int, offset: int) -> None: colls = self.client.list_collections(limit=limit, offset=offset) total_collections = self.client.count_collections() # get all collections all_colls = self.client.list_collections() # manually slice the collections based on the given limit and offset man_colls = all_colls[offset : offset + limit] # given limit and offset, make various assertions regarding the total number of collections if limit + offset > total_collections: assert len(colls) == max(total_collections - offset, 0) # assert that our manually sliced collections are the same as the ones returned by the API assert colls == man_colls else: assert len(colls) == limit @rule( target=collections, new_metadata=st.one_of(st.none(), strategies.collection_metadata), coll=st.one_of(consumes(collections), strategies.collections()), ) def get_or_create_coll( self, coll: strategies.ExternalCollection, new_metadata: Optional[types.Metadata], ) -> MultipleResults[strategies.ExternalCollection]: # Cases for get_or_create # Case 0 # new_metadata is none, coll is an existing collection # get_or_create should return the existing collection with existing metadata # Case 1 # new_metadata is none, coll is a new collection # get_or_create should create a new collection with the metadata of None # Case 2 # new_metadata is not none, coll is an existing collection # get_or_create should return the existing collection with the original metadata # Case 3 # new_metadata is not none, coll is a new collection # get_or_create should create a new collection with the new metadata if new_metadata is not None and len(new_metadata) == 0: with pytest.raises(Exception): c = self.client.get_or_create_collection( name=coll.name, metadata=new_metadata, # type: ignore[arg-type] embedding_function=coll.embedding_function, ) return multiple() # Update model if coll.name not in self.model: # Handles case 1 and 3 coll.metadata = new_metadata self.set_model(coll.name, coll.metadata) # type: ignore[arg-type] # Update API c = self.client.get_or_create_collection( name=coll.name, metadata=new_metadata, # type: ignore[arg-type] embedding_function=coll.embedding_function, ) # Check that model and API are in sync assert c.name == coll.name assert c.metadata == self.model[coll.name] return multiple(coll) @rule( target=collections, coll=consumes(collections), new_metadata=strategies.collection_metadata, new_name=st.one_of(st.none(), strategies.collection_name()), ) def modify_coll( self, coll: strategies.ExternalCollection, new_metadata: types.Metadata, new_name: Optional[str], ) -> MultipleResults[strategies.ExternalCollection]: if coll.name not in self.model: with pytest.raises(Exception): c = self.client.get_collection(name=coll.name) return multiple() c = self.client.get_collection(name=coll.name) _metadata: Optional[Mapping[str, Any]] = self.model[coll.name] _name: str = coll.name if new_metadata is not None: # Can't set metadata to an empty dict if len(new_metadata) == 0: with pytest.raises(Exception): c = self.client.get_or_create_collection( name=coll.name, metadata=new_metadata, # type: ignore[arg-type] embedding_function=coll.embedding_function, ) return multiple() coll.metadata = new_metadata _metadata = new_metadata if new_name is not None: if new_name in self.model and new_name != coll.name: with pytest.raises(Exception): c.modify(metadata=new_metadata, name=new_name) # type: ignore[arg-type] return multiple() self.delete_from_model(coll.name) coll.name = new_name _name = new_name self.set_model(_name, _metadata) # type: ignore[arg-type] c.modify(metadata=_metadata, name=_name) # type: ignore[arg-type] c = self.client.get_collection(name=coll.name) assert c.name == coll.name assert c.metadata == self.model[coll.name] return multiple(coll) def set_model( self, name: str, metadata: Optional[types.CollectionMetadata], ) -> None: model = self.model model[name] = metadata def delete_from_model(self, name: str) -> None: model = self.model del model[name] @property def model(self) -> Dict[str, Optional[types.CollectionMetadata]]: return self._model def test_collections(caplog: pytest.LogCaptureFixture, client: ClientAPI) -> None: caplog.set_level(logging.ERROR) run_state_machine_as_test(lambda: CollectionStateMachine(client)) # type: ignore # Below are tests that have failed in the past. If your test fails, please add # it to protect against regressions in the test harness itself. If you need # help doing so, talk to anton. def test_previously_failing_one(client: ClientAPI) -> None: state = CollectionStateMachine(client) state.initialize() # I don't know why the typechecker is red here. This code is correct and is # pulled from the logs. (v1,) = state.get_or_create_coll( # type: ignore[misc] coll=strategies.ExternalCollection( name="jjn2yjLW1zp2T\n", metadata=None, embedding_function=hashing_embedding_function(dtype=numpy.float32, dim=863), # type: ignore[arg-type] ), new_metadata=None, ) (v6,) = state.get_or_create_coll( # type: ignore[misc] coll=strategies.ExternalCollection( name="jjn2yjLW1zp2T\n", metadata=None, embedding_function=hashing_embedding_function(dtype=numpy.float32, dim=863), # type: ignore[arg-type] ), new_metadata=None, ) state.modify_coll( coll=v1, new_metadata={"7": -1281, "fGe": -0.0, "K5j": "im"}, new_name=None ) state.modify_coll(coll=v6, new_metadata=None, new_name=None) # https://github.com/chroma-core/chroma/commit/cf476d70f0cebb7c87cb30c7172ba74d6ea175cd#diff-e81868b665d149bb315d86890dea6fc6a9fc9fc9ea3089aa7728142b54f622c5R210 def test_previously_failing_two(client: ClientAPI) -> None: state = CollectionStateMachine(client) state.initialize() (v13,) = state.get_or_create_coll( # type: ignore[misc] coll=strategies.ExternalCollection( name="C1030", metadata={}, embedding_function=hashing_embedding_function(dim=2, dtype=numpy.float32), # type: ignore[arg-type] ), new_metadata=None, ) (v15,) = state.modify_coll( # type: ignore[misc] coll=v13, new_metadata={ "0": "10", "40": "0", "p1nviWeL7fO": "qN", "7b": "YS", "VYWq4LEMWjCo": True, }, new_name="OF5F0MzbQg\n", ) state.get_or_create_coll( coll=strategies.ExternalCollection( name="VS0QGh", metadata={ "h": 5.681951615025145e-227, "A1": 61126, "uhUhLEEMfeC_kN": 2147483647, "weF": "pSP", "B3DSaP": False, "6H533K": 1.192092896e-07, }, embedding_function=hashing_embedding_function( # type: ignore[arg-type] dim=1915, dtype=numpy.float32 ), ), new_metadata={ "xVW09xUpDZA": 31734, "g": 1.1, "n1dUTalF-MY": -1000000.0, "y": "G3EtXTZ", "ugXZ_hK": 5494, }, ) v17 = state.modify_coll( # noqa: F841 coll=v15, new_metadata={"L35J2S": "K0l026"}, new_name="Ai1\n" ) v18 = state.get_or_create_coll(coll=v13, new_metadata=None) # noqa: F841 state.get_or_create_coll( coll=strategies.ExternalCollection( name="VS0QGh", metadata=None, embedding_function=hashing_embedding_function(dim=326, dtype=numpy.float16), # type: ignore[arg-type] ), new_metadata=None, )
Memory