add num_docs (#62)

* add num_docs

add num_docs

* Update test_inmemory_vectordb.py

* Update test_hnswlib_vectordb.py
This commit is contained in:
0x376h 2023-10-08 11:16:26 +08:00 committed by GitHub
parent a8531f61d6
commit 059dc489b4
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 20 additions and 3 deletions

View File

@ -169,4 +169,11 @@ def test_hnswlib_vectordb_restore(docs_to_index, tmpdir):
assert len(res.matches) == 10
# assert res.id == res.matches[0].id
# assert res.text == res.matches[0].text
# assert res.scores[0] < 0.001 # some precision issues, should be 0
# assert res.scores[0] < 0.001 # some precision issues, should be 0
def test_hnswlib_num_dos(tmpdir):
db = HNSWVectorDB[MyDoc](workspace=str(tmpdir))
doc_list = [MyDoc(text=f'toy doc {i}', embedding=np.random.rand(128)) for i in range(1000)]
db.index(inputs=DocList[MyDoc](doc_list))
x=db.num_docs()
assert x['num_docs']==1000

View File

@ -172,3 +172,10 @@ def test_inmemory_vectordb_restore(docs_to_index, tmpdir):
assert res.id == res.matches[0].id
assert res.text == res.matches[0].text
assert res.scores[0] > 0.99 # some precision issues, should be 1
def test_inmemory_num_dos(tmpdir):
db = InMemoryExactNNVectorDB[MyDoc](workspace=str(tmpdir))
doc_list = [MyDoc(text=f'toy doc {i}', embedding=np.random.rand(128)) for i in range(1000)]
db.index(inputs=DocList[MyDoc](doc_list))
x=db.num_docs()
assert x['num_docs']==1000

View File

@ -227,6 +227,9 @@ class VectorDB(Generic[TSchema]):
ret = asyncio.run(_deploy())
return ret
def num_docs(self, **kwargs):
return self._executor.num_docs()
@pass_kwargs_as_params
@unify_input_output
def index(self, docs: 'DocList[TSchema]', parameters: Optional[Dict] = None, **kwargs):

View File

@ -105,7 +105,7 @@ class HNSWLibIndexer(TypedExecutor):
return self.update(docs, *args, **kwargs)
def num_docs(self, **kwargs):
return {'num_docs': self._index.num_docs()}
return {'num_docs': self._indexer.num_docs()}
def snapshot(self, snapshot_dir):
# TODO: Maybe copy the work_dir to workspace if `handle` is False

View File

@ -71,7 +71,7 @@ class InMemoryExactNNIndexer(TypedExecutor):
return self._index(docs)
def num_docs(self, *args, **kwargs):
return {'num_docs': self._index.num_docs()}
return {'num_docs': self._indexer.num_docs()}
def snapshot(self, snapshot_dir):
snapshot_file = f'{snapshot_dir}/index.bin'