Merge branch 'main' of https://github.com/jina-ai/vectordb into fix-tests

Signed-off-by: Joan Fontanals Martinez <joan.martinez@jina.ai>
This commit is contained in:
Joan Fontanals Martinez 2023-06-19 10:40:09 +02:00
commit d30b14e700
5 changed files with 33 additions and 18 deletions

View File

@ -89,7 +89,9 @@ jobs:
python -m pip install wheel python -m pip install wheel
pip install pytest pip install pytest
pip install . pip install .
pip install -U docarray[hnswlib]>=0.33.0 #pip install -U docarray[hnswlib]>=0.33.0
pip install git+https://github.com/jina-ai/jina.git@fix-handle-list-float
pip install git+https://github.com/docarray/docarray.git@main
- name: Test - name: Test
id: test id: test
run: | run: |
@ -118,7 +120,9 @@ jobs:
python -m pip install wheel python -m pip install wheel
pip install pytest pip install pytest
pip install . pip install .
pip install -U docarray[hnswlib]>=0.33.0 #pip install -U docarray[hnswlib]>=0.33.0
pip install git+https://github.com/jina-ai/jina.git@fix-handle-list-float
pip install git+https://github.com/docarray/docarray.git@main
- name: Test - name: Test
id: test id: test
run: | run: |

View File

@ -53,7 +53,7 @@ jobs:
env: env:
TWINE_USERNAME: ${{ secrets.TWINE_USERNAME }} TWINE_USERNAME: ${{ secrets.TWINE_USERNAME }}
TWINE_PASSWORD: ${{ secrets.TWINE_PASSWORD }} TWINE_PASSWORD: ${{ secrets.TWINE_PASSWORD }}
JINA_SLACK_WEBHOOK: ${{ secrets.JINA_SLACK_WEBHOOK }}
- if: failure() - if: failure()
run: echo "nothing to release" run: echo "nothing to release"
- name: bumping master version - name: bumping master version

View File

@ -40,8 +40,7 @@ setup(
python_requires='>=3.7', python_requires='>=3.7',
entry_points={ entry_points={
'console_scripts': [ 'console_scripts': [
'vectordb=vectordb.__main__:serve', 'vectordb=vectordb.__main__:vectordb',
'vectordb=vectordb.__main__:deploy',
], ],
}, },
extras_require={ extras_require={

View File

@ -7,6 +7,7 @@ import numpy as np
from docarray import DocList, BaseDoc from docarray import DocList, BaseDoc
from docarray.typing import NdArray from docarray.typing import NdArray
from vectordb import HNSWVectorDB from vectordb import HNSWVectorDB
from jina.helper import random_port
class MyDoc(BaseDoc): class MyDoc(BaseDoc):
@ -26,7 +27,8 @@ def docs_to_index():
@pytest.mark.parametrize('protocol', ['grpc', 'http', 'websocket']) @pytest.mark.parametrize('protocol', ['grpc', 'http', 'websocket'])
def test_hnswlib_vectordb_batch(docs_to_index, replicas, shards, protocol, tmpdir): def test_hnswlib_vectordb_batch(docs_to_index, replicas, shards, protocol, tmpdir):
query = docs_to_index[:10] query = docs_to_index[:10]
with HNSWVectorDB[MyDoc].serve(workspace=str(tmpdir), replicas=replicas, shards=shards, protocol=protocol, port = random_port()
with HNSWVectorDB[MyDoc].serve(workspace=str(tmpdir), replicas=replicas, shards=shards, protocol=protocol, port=port,
uses_with={'ef': 5000}) as db: uses_with={'ef': 5000}) as db:
db.index(inputs=docs_to_index) db.index(inputs=docs_to_index)
if replicas > 1: if replicas > 1:
@ -46,7 +48,8 @@ def test_hnswlib_vectordb_batch(docs_to_index, replicas, shards, protocol, tmpdi
@pytest.mark.parametrize('protocol', ['grpc', 'http', 'websocket']) @pytest.mark.parametrize('protocol', ['grpc', 'http', 'websocket'])
def test_hnswlib_vectordb_single_query(docs_to_index, limit, replicas, shards, protocol, tmpdir): def test_hnswlib_vectordb_single_query(docs_to_index, limit, replicas, shards, protocol, tmpdir):
query = docs_to_index[100] query = docs_to_index[100]
with HNSWVectorDB[MyDoc](ef=5000).serve(workspace=str(tmpdir), replicas=replicas, shards=shards, port = random_port()
with HNSWVectorDB[MyDoc](ef=5000).serve(workspace=str(tmpdir), replicas=replicas, shards=shards, port=port,
protocol=protocol) as db: protocol=protocol) as db:
db.index(inputs=docs_to_index) db.index(inputs=docs_to_index)
if replicas > 1: if replicas > 1:
@ -63,8 +66,9 @@ def test_hnswlib_vectordb_single_query(docs_to_index, limit, replicas, shards, p
@pytest.mark.parametrize('protocol', ['grpc', 'http', 'websocket']) @pytest.mark.parametrize('protocol', ['grpc', 'http', 'websocket'])
def test_hnswlib_vectordb_delete(docs_to_index, replicas, shards, protocol, tmpdir): def test_hnswlib_vectordb_delete(docs_to_index, replicas, shards, protocol, tmpdir):
query = docs_to_index[0] query = docs_to_index[0]
port = random_port()
delete = MyDoc(id=query.id, text='', embedding=np.random.rand(128)) delete = MyDoc(id=query.id, text='', embedding=np.random.rand(128))
with HNSWVectorDB[MyDoc].serve(workspace=str(tmpdir), replicas=replicas, shards=shards, protocol=protocol, with HNSWVectorDB[MyDoc].serve(workspace=str(tmpdir), replicas=replicas, shards=shards, protocol=protocol, port=port,
uses_with={'ef': 5000}) as db: uses_with={'ef': 5000}) as db:
db.index(inputs=docs_to_index) db.index(inputs=docs_to_index)
if replicas > 1: if replicas > 1:
@ -91,8 +95,9 @@ def test_hnswlib_vectordb_delete(docs_to_index, replicas, shards, protocol, tmpd
@pytest.mark.parametrize('protocol', ['grpc', 'http', 'websocket']) @pytest.mark.parametrize('protocol', ['grpc', 'http', 'websocket'])
def test_hnswlib_vectordb_udpate_text(docs_to_index, replicas, shards, protocol, tmpdir): def test_hnswlib_vectordb_udpate_text(docs_to_index, replicas, shards, protocol, tmpdir):
query = docs_to_index[0] query = docs_to_index[0]
port = random_port()
update = MyDoc(id=query.id, text=query.text + '_changed', embedding=query.embedding) update = MyDoc(id=query.id, text=query.text + '_changed', embedding=query.embedding)
with HNSWVectorDB[MyDoc].serve(workspace=str(tmpdir), replicas=replicas, shards=shards, protocol=protocol, with HNSWVectorDB[MyDoc].serve(workspace=str(tmpdir), replicas=replicas, shards=shards, protocol=protocol, port=port,
uses_with={'ef': 5000}) as db: uses_with={'ef': 5000}) as db:
db.index(inputs=docs_to_index) db.index(inputs=docs_to_index)
if replicas > 1: if replicas > 1:
@ -118,8 +123,9 @@ def test_hnswlib_vectordb_udpate_text(docs_to_index, replicas, shards, protocol,
@pytest.mark.parametrize('protocol', ['grpc', 'http', 'websocket']) @pytest.mark.parametrize('protocol', ['grpc', 'http', 'websocket'])
def test_hnswlib_vectordb_restore(docs_to_index, replicas, shards, protocol, tmpdir): def test_hnswlib_vectordb_restore(docs_to_index, replicas, shards, protocol, tmpdir):
query = docs_to_index[:100] query = docs_to_index[:100]
port = random_port()
with HNSWVectorDB[MyDoc](ef=5000).serve(workspace=str(tmpdir), replicas=replicas, shards=shards, with HNSWVectorDB[MyDoc](ef=5000).serve(workspace=str(tmpdir), replicas=replicas, shards=shards, port=port,
protocol=protocol) as db: protocol=protocol) as db:
db.index(docs=docs_to_index) db.index(docs=docs_to_index)
if replicas > 1: if replicas > 1:
@ -132,7 +138,7 @@ def test_hnswlib_vectordb_restore(docs_to_index, replicas, shards, protocol, tmp
assert res.text == res.matches[0].text assert res.text == res.matches[0].text
assert res.scores[0] < 0.001 # some precision issues, should be 0.0 assert res.scores[0] < 0.001 # some precision issues, should be 0.0
with HNSWVectorDB[MyDoc].serve(workspace=str(tmpdir), replicas=replicas, shards=shards, with HNSWVectorDB[MyDoc].serve(workspace=str(tmpdir), replicas=replicas, shards=shards, port=port,
protocol=protocol, uses_with={'ef': 5000}) as new_db: protocol=protocol, uses_with={'ef': 5000}) as new_db:
time.sleep(2) time.sleep(2)
resp = new_db.search(docs=query) resp = new_db.search(docs=query)

View File

@ -7,6 +7,7 @@ import numpy as np
from docarray import DocList, BaseDoc from docarray import DocList, BaseDoc
from docarray.typing import NdArray from docarray.typing import NdArray
from vectordb import InMemoryExactNNVectorDB from vectordb import InMemoryExactNNVectorDB
from jina.helper import random_port
class MyDoc(BaseDoc): class MyDoc(BaseDoc):
@ -26,7 +27,8 @@ def docs_to_index():
@pytest.mark.parametrize('protocol', ['grpc', 'http', 'websocket']) @pytest.mark.parametrize('protocol', ['grpc', 'http', 'websocket'])
def test_inmemory_vectordb_batch(docs_to_index, replicas, shards, protocol, tmpdir): def test_inmemory_vectordb_batch(docs_to_index, replicas, shards, protocol, tmpdir):
query = docs_to_index[:10] query = docs_to_index[:10]
with InMemoryExactNNVectorDB[MyDoc].serve(workspace=str(tmpdir), replicas=replicas, shards=shards, port = random_port()
with InMemoryExactNNVectorDB[MyDoc].serve(workspace=str(tmpdir), replicas=replicas, shards=shards, port=port,
protocol=protocol) as db: protocol=protocol) as db:
db.index(inputs=docs_to_index) db.index(inputs=docs_to_index)
if replicas > 1: if replicas > 1:
@ -46,7 +48,8 @@ def test_inmemory_vectordb_batch(docs_to_index, replicas, shards, protocol, tmpd
@pytest.mark.parametrize('protocol', ['grpc', 'http', 'websocket']) @pytest.mark.parametrize('protocol', ['grpc', 'http', 'websocket'])
def test_inmemory_vectordb_single_query(docs_to_index, limit, replicas, shards, protocol, tmpdir): def test_inmemory_vectordb_single_query(docs_to_index, limit, replicas, shards, protocol, tmpdir):
query = docs_to_index[100] query = docs_to_index[100]
with InMemoryExactNNVectorDB[MyDoc].serve(workspace=str(tmpdir), replicas=replicas, shards=shards, port = random_port()
with InMemoryExactNNVectorDB[MyDoc].serve(workspace=str(tmpdir), replicas=replicas, shards=shards, port=port,
protocol=protocol) as db: protocol=protocol) as db:
db.index(inputs=docs_to_index) db.index(inputs=docs_to_index)
if replicas > 1: if replicas > 1:
@ -63,8 +66,9 @@ def test_inmemory_vectordb_single_query(docs_to_index, limit, replicas, shards,
@pytest.mark.parametrize('protocol', ['grpc', 'http', 'websocket']) @pytest.mark.parametrize('protocol', ['grpc', 'http', 'websocket'])
def test_inmemory_vectordb_delete(docs_to_index, replicas, shards, protocol, tmpdir): def test_inmemory_vectordb_delete(docs_to_index, replicas, shards, protocol, tmpdir):
query = docs_to_index[0] query = docs_to_index[0]
port = random_port()
delete = MyDoc(id=query.id, text='', embedding=np.random.rand(128)) delete = MyDoc(id=query.id, text='', embedding=np.random.rand(128))
with InMemoryExactNNVectorDB[MyDoc].serve(workspace=str(tmpdir), replicas=replicas, shards=shards, with InMemoryExactNNVectorDB[MyDoc].serve(workspace=str(tmpdir), replicas=replicas, shards=shards, port=port,
protocol=protocol) as db: protocol=protocol) as db:
db.index(inputs=docs_to_index) db.index(inputs=docs_to_index)
if replicas > 1: if replicas > 1:
@ -91,8 +95,9 @@ def test_inmemory_vectordb_delete(docs_to_index, replicas, shards, protocol, tmp
@pytest.mark.parametrize('protocol', ['grpc', 'http', 'websocket']) @pytest.mark.parametrize('protocol', ['grpc', 'http', 'websocket'])
def test_inmemory_vectordb_udpate_text(docs_to_index, replicas, shards, protocol, tmpdir): def test_inmemory_vectordb_udpate_text(docs_to_index, replicas, shards, protocol, tmpdir):
query = docs_to_index[0] query = docs_to_index[0]
port = random_port()
update = MyDoc(id=query.id, text=query.text + '_changed', embedding=query.embedding) update = MyDoc(id=query.id, text=query.text + '_changed', embedding=query.embedding)
with InMemoryExactNNVectorDB[MyDoc].serve(workspace=str(tmpdir), replicas=replicas, shards=shards, with InMemoryExactNNVectorDB[MyDoc].serve(workspace=str(tmpdir), replicas=replicas, shards=shards, port=port,
protocol=protocol) as db: protocol=protocol) as db:
db.index(inputs=docs_to_index) db.index(inputs=docs_to_index)
if replicas > 1: if replicas > 1:
@ -118,8 +123,9 @@ def test_inmemory_vectordb_udpate_text(docs_to_index, replicas, shards, protocol
@pytest.mark.parametrize('protocol', ['grpc', 'http', 'websocket']) @pytest.mark.parametrize('protocol', ['grpc', 'http', 'websocket'])
def test_inmemory_vectordb_restore(docs_to_index, replicas, shards, protocol, tmpdir): def test_inmemory_vectordb_restore(docs_to_index, replicas, shards, protocol, tmpdir):
query = docs_to_index[:100] query = docs_to_index[:100]
port = random_port()
with InMemoryExactNNVectorDB[MyDoc].serve(workspace=str(tmpdir), replicas=replicas, shards=shards, with InMemoryExactNNVectorDB[MyDoc].serve(workspace=str(tmpdir), replicas=replicas, shards=shards, port=port,
protocol=protocol) as db: protocol=protocol) as db:
db.index(docs=docs_to_index) db.index(docs=docs_to_index)
if replicas > 1: if replicas > 1:
@ -132,7 +138,7 @@ def test_inmemory_vectordb_restore(docs_to_index, replicas, shards, protocol, tm
assert res.text == res.matches[0].text assert res.text == res.matches[0].text
assert res.scores[0] > 0.99 # some precision issues, should be 1 assert res.scores[0] > 0.99 # some precision issues, should be 1
with InMemoryExactNNVectorDB[MyDoc].serve(workspace=str(tmpdir), replicas=replicas, shards=shards, with InMemoryExactNNVectorDB[MyDoc].serve(workspace=str(tmpdir), replicas=replicas, shards=shards, port=port,
protocol=protocol) as new_db: protocol=protocol) as new_db:
time.sleep(2) time.sleep(2)
resp = new_db.search(docs=query) resp = new_db.search(docs=query)