Merge branch 'main' of https://github.com/jina-ai/vectordb into fix-tests

Signed-off-by: Joan Fontanals Martinez <joan.martinez@jina.ai>
This commit is contained in:
Joan Fontanals Martinez 2023-06-19 10:40:09 +02:00
commit d30b14e700
5 changed files with 33 additions and 18 deletions

View File

@ -89,7 +89,9 @@ jobs:
python -m pip install wheel
pip install pytest
pip install .
pip install -U docarray[hnswlib]>=0.33.0
#pip install -U docarray[hnswlib]>=0.33.0
pip install git+https://github.com/jina-ai/jina.git@fix-handle-list-float
pip install git+https://github.com/docarray/docarray.git@main
- name: Test
id: test
run: |
@ -118,7 +120,9 @@ jobs:
python -m pip install wheel
pip install pytest
pip install .
pip install -U docarray[hnswlib]>=0.33.0
#pip install -U docarray[hnswlib]>=0.33.0
pip install git+https://github.com/jina-ai/jina.git@fix-handle-list-float
pip install git+https://github.com/docarray/docarray.git@main
- name: Test
id: test
run: |

View File

@ -53,7 +53,7 @@ jobs:
env:
TWINE_USERNAME: ${{ secrets.TWINE_USERNAME }}
TWINE_PASSWORD: ${{ secrets.TWINE_PASSWORD }}
JINA_SLACK_WEBHOOK: ${{ secrets.JINA_SLACK_WEBHOOK }}
- if: failure()
run: echo "nothing to release"
- name: bumping master version
@ -61,4 +61,4 @@ jobs:
with:
github_token: ${{ secrets.JINA_DEV_BOT }}
tags: true
branch: main
branch: main

View File

@ -40,8 +40,7 @@ setup(
python_requires='>=3.7',
entry_points={
'console_scripts': [
'vectordb=vectordb.__main__:serve',
'vectordb=vectordb.__main__:deploy',
'vectordb=vectordb.__main__:vectordb',
],
},
extras_require={

View File

@ -7,6 +7,7 @@ import numpy as np
from docarray import DocList, BaseDoc
from docarray.typing import NdArray
from vectordb import HNSWVectorDB
from jina.helper import random_port
class MyDoc(BaseDoc):
@ -26,7 +27,8 @@ def docs_to_index():
@pytest.mark.parametrize('protocol', ['grpc', 'http', 'websocket'])
def test_hnswlib_vectordb_batch(docs_to_index, replicas, shards, protocol, tmpdir):
query = docs_to_index[:10]
with HNSWVectorDB[MyDoc].serve(workspace=str(tmpdir), replicas=replicas, shards=shards, protocol=protocol,
port = random_port()
with HNSWVectorDB[MyDoc].serve(workspace=str(tmpdir), replicas=replicas, shards=shards, protocol=protocol, port=port,
uses_with={'ef': 5000}) as db:
db.index(inputs=docs_to_index)
if replicas > 1:
@ -46,7 +48,8 @@ def test_hnswlib_vectordb_batch(docs_to_index, replicas, shards, protocol, tmpdi
@pytest.mark.parametrize('protocol', ['grpc', 'http', 'websocket'])
def test_hnswlib_vectordb_single_query(docs_to_index, limit, replicas, shards, protocol, tmpdir):
query = docs_to_index[100]
with HNSWVectorDB[MyDoc](ef=5000).serve(workspace=str(tmpdir), replicas=replicas, shards=shards,
port = random_port()
with HNSWVectorDB[MyDoc](ef=5000).serve(workspace=str(tmpdir), replicas=replicas, shards=shards, port=port,
protocol=protocol) as db:
db.index(inputs=docs_to_index)
if replicas > 1:
@ -63,8 +66,9 @@ def test_hnswlib_vectordb_single_query(docs_to_index, limit, replicas, shards, p
@pytest.mark.parametrize('protocol', ['grpc', 'http', 'websocket'])
def test_hnswlib_vectordb_delete(docs_to_index, replicas, shards, protocol, tmpdir):
query = docs_to_index[0]
port = random_port()
delete = MyDoc(id=query.id, text='', embedding=np.random.rand(128))
with HNSWVectorDB[MyDoc].serve(workspace=str(tmpdir), replicas=replicas, shards=shards, protocol=protocol,
with HNSWVectorDB[MyDoc].serve(workspace=str(tmpdir), replicas=replicas, shards=shards, protocol=protocol, port=port,
uses_with={'ef': 5000}) as db:
db.index(inputs=docs_to_index)
if replicas > 1:
@ -91,8 +95,9 @@ def test_hnswlib_vectordb_delete(docs_to_index, replicas, shards, protocol, tmpd
@pytest.mark.parametrize('protocol', ['grpc', 'http', 'websocket'])
def test_hnswlib_vectordb_udpate_text(docs_to_index, replicas, shards, protocol, tmpdir):
query = docs_to_index[0]
port = random_port()
update = MyDoc(id=query.id, text=query.text + '_changed', embedding=query.embedding)
with HNSWVectorDB[MyDoc].serve(workspace=str(tmpdir), replicas=replicas, shards=shards, protocol=protocol,
with HNSWVectorDB[MyDoc].serve(workspace=str(tmpdir), replicas=replicas, shards=shards, protocol=protocol, port=port,
uses_with={'ef': 5000}) as db:
db.index(inputs=docs_to_index)
if replicas > 1:
@ -118,8 +123,9 @@ def test_hnswlib_vectordb_udpate_text(docs_to_index, replicas, shards, protocol,
@pytest.mark.parametrize('protocol', ['grpc', 'http', 'websocket'])
def test_hnswlib_vectordb_restore(docs_to_index, replicas, shards, protocol, tmpdir):
query = docs_to_index[:100]
port = random_port()
with HNSWVectorDB[MyDoc](ef=5000).serve(workspace=str(tmpdir), replicas=replicas, shards=shards,
with HNSWVectorDB[MyDoc](ef=5000).serve(workspace=str(tmpdir), replicas=replicas, shards=shards, port=port,
protocol=protocol) as db:
db.index(docs=docs_to_index)
if replicas > 1:
@ -132,7 +138,7 @@ def test_hnswlib_vectordb_restore(docs_to_index, replicas, shards, protocol, tmp
assert res.text == res.matches[0].text
assert res.scores[0] < 0.001 # some precision issues, should be 0.0
with HNSWVectorDB[MyDoc].serve(workspace=str(tmpdir), replicas=replicas, shards=shards,
with HNSWVectorDB[MyDoc].serve(workspace=str(tmpdir), replicas=replicas, shards=shards, port=port,
protocol=protocol, uses_with={'ef': 5000}) as new_db:
time.sleep(2)
resp = new_db.search(docs=query)

View File

@ -7,6 +7,7 @@ import numpy as np
from docarray import DocList, BaseDoc
from docarray.typing import NdArray
from vectordb import InMemoryExactNNVectorDB
from jina.helper import random_port
class MyDoc(BaseDoc):
@ -26,7 +27,8 @@ def docs_to_index():
@pytest.mark.parametrize('protocol', ['grpc', 'http', 'websocket'])
def test_inmemory_vectordb_batch(docs_to_index, replicas, shards, protocol, tmpdir):
query = docs_to_index[:10]
with InMemoryExactNNVectorDB[MyDoc].serve(workspace=str(tmpdir), replicas=replicas, shards=shards,
port = random_port()
with InMemoryExactNNVectorDB[MyDoc].serve(workspace=str(tmpdir), replicas=replicas, shards=shards, port=port,
protocol=protocol) as db:
db.index(inputs=docs_to_index)
if replicas > 1:
@ -46,7 +48,8 @@ def test_inmemory_vectordb_batch(docs_to_index, replicas, shards, protocol, tmpd
@pytest.mark.parametrize('protocol', ['grpc', 'http', 'websocket'])
def test_inmemory_vectordb_single_query(docs_to_index, limit, replicas, shards, protocol, tmpdir):
query = docs_to_index[100]
with InMemoryExactNNVectorDB[MyDoc].serve(workspace=str(tmpdir), replicas=replicas, shards=shards,
port = random_port()
with InMemoryExactNNVectorDB[MyDoc].serve(workspace=str(tmpdir), replicas=replicas, shards=shards, port=port,
protocol=protocol) as db:
db.index(inputs=docs_to_index)
if replicas > 1:
@ -63,8 +66,9 @@ def test_inmemory_vectordb_single_query(docs_to_index, limit, replicas, shards,
@pytest.mark.parametrize('protocol', ['grpc', 'http', 'websocket'])
def test_inmemory_vectordb_delete(docs_to_index, replicas, shards, protocol, tmpdir):
query = docs_to_index[0]
port = random_port()
delete = MyDoc(id=query.id, text='', embedding=np.random.rand(128))
with InMemoryExactNNVectorDB[MyDoc].serve(workspace=str(tmpdir), replicas=replicas, shards=shards,
with InMemoryExactNNVectorDB[MyDoc].serve(workspace=str(tmpdir), replicas=replicas, shards=shards, port=port,
protocol=protocol) as db:
db.index(inputs=docs_to_index)
if replicas > 1:
@ -91,8 +95,9 @@ def test_inmemory_vectordb_delete(docs_to_index, replicas, shards, protocol, tmp
@pytest.mark.parametrize('protocol', ['grpc', 'http', 'websocket'])
def test_inmemory_vectordb_udpate_text(docs_to_index, replicas, shards, protocol, tmpdir):
query = docs_to_index[0]
port = random_port()
update = MyDoc(id=query.id, text=query.text + '_changed', embedding=query.embedding)
with InMemoryExactNNVectorDB[MyDoc].serve(workspace=str(tmpdir), replicas=replicas, shards=shards,
with InMemoryExactNNVectorDB[MyDoc].serve(workspace=str(tmpdir), replicas=replicas, shards=shards, port=port,
protocol=protocol) as db:
db.index(inputs=docs_to_index)
if replicas > 1:
@ -118,8 +123,9 @@ def test_inmemory_vectordb_udpate_text(docs_to_index, replicas, shards, protocol
@pytest.mark.parametrize('protocol', ['grpc', 'http', 'websocket'])
def test_inmemory_vectordb_restore(docs_to_index, replicas, shards, protocol, tmpdir):
query = docs_to_index[:100]
port = random_port()
with InMemoryExactNNVectorDB[MyDoc].serve(workspace=str(tmpdir), replicas=replicas, shards=shards,
with InMemoryExactNNVectorDB[MyDoc].serve(workspace=str(tmpdir), replicas=replicas, shards=shards, port=port,
protocol=protocol) as db:
db.index(docs=docs_to_index)
if replicas > 1:
@ -132,7 +138,7 @@ def test_inmemory_vectordb_restore(docs_to_index, replicas, shards, protocol, tm
assert res.text == res.matches[0].text
assert res.scores[0] > 0.99 # some precision issues, should be 1
with InMemoryExactNNVectorDB[MyDoc].serve(workspace=str(tmpdir), replicas=replicas, shards=shards,
with InMemoryExactNNVectorDB[MyDoc].serve(workspace=str(tmpdir), replicas=replicas, shards=shards, port=port,
protocol=protocol) as new_db:
time.sleep(2)
resp = new_db.search(docs=query)