Merge branch 'main' of https://github.com/jina-ai/vectordb into fix-tests
Signed-off-by: Joan Fontanals Martinez <joan.martinez@jina.ai>
This commit is contained in:
commit
d30b14e700
|
@ -89,7 +89,9 @@ jobs:
|
||||||
python -m pip install wheel
|
python -m pip install wheel
|
||||||
pip install pytest
|
pip install pytest
|
||||||
pip install .
|
pip install .
|
||||||
pip install -U docarray[hnswlib]>=0.33.0
|
#pip install -U docarray[hnswlib]>=0.33.0
|
||||||
|
pip install git+https://github.com/jina-ai/jina.git@fix-handle-list-float
|
||||||
|
pip install git+https://github.com/docarray/docarray.git@main
|
||||||
- name: Test
|
- name: Test
|
||||||
id: test
|
id: test
|
||||||
run: |
|
run: |
|
||||||
|
@ -118,7 +120,9 @@ jobs:
|
||||||
python -m pip install wheel
|
python -m pip install wheel
|
||||||
pip install pytest
|
pip install pytest
|
||||||
pip install .
|
pip install .
|
||||||
pip install -U docarray[hnswlib]>=0.33.0
|
#pip install -U docarray[hnswlib]>=0.33.0
|
||||||
|
pip install git+https://github.com/jina-ai/jina.git@fix-handle-list-float
|
||||||
|
pip install git+https://github.com/docarray/docarray.git@main
|
||||||
- name: Test
|
- name: Test
|
||||||
id: test
|
id: test
|
||||||
run: |
|
run: |
|
||||||
|
|
|
@ -53,7 +53,7 @@ jobs:
|
||||||
env:
|
env:
|
||||||
TWINE_USERNAME: ${{ secrets.TWINE_USERNAME }}
|
TWINE_USERNAME: ${{ secrets.TWINE_USERNAME }}
|
||||||
TWINE_PASSWORD: ${{ secrets.TWINE_PASSWORD }}
|
TWINE_PASSWORD: ${{ secrets.TWINE_PASSWORD }}
|
||||||
JINA_SLACK_WEBHOOK: ${{ secrets.JINA_SLACK_WEBHOOK }}
|
|
||||||
- if: failure()
|
- if: failure()
|
||||||
run: echo "nothing to release"
|
run: echo "nothing to release"
|
||||||
- name: bumping master version
|
- name: bumping master version
|
||||||
|
|
3
setup.py
3
setup.py
|
@ -40,8 +40,7 @@ setup(
|
||||||
python_requires='>=3.7',
|
python_requires='>=3.7',
|
||||||
entry_points={
|
entry_points={
|
||||||
'console_scripts': [
|
'console_scripts': [
|
||||||
'vectordb=vectordb.__main__:serve',
|
'vectordb=vectordb.__main__:vectordb',
|
||||||
'vectordb=vectordb.__main__:deploy',
|
|
||||||
],
|
],
|
||||||
},
|
},
|
||||||
extras_require={
|
extras_require={
|
||||||
|
|
|
@ -7,6 +7,7 @@ import numpy as np
|
||||||
from docarray import DocList, BaseDoc
|
from docarray import DocList, BaseDoc
|
||||||
from docarray.typing import NdArray
|
from docarray.typing import NdArray
|
||||||
from vectordb import HNSWVectorDB
|
from vectordb import HNSWVectorDB
|
||||||
|
from jina.helper import random_port
|
||||||
|
|
||||||
|
|
||||||
class MyDoc(BaseDoc):
|
class MyDoc(BaseDoc):
|
||||||
|
@ -26,7 +27,8 @@ def docs_to_index():
|
||||||
@pytest.mark.parametrize('protocol', ['grpc', 'http', 'websocket'])
|
@pytest.mark.parametrize('protocol', ['grpc', 'http', 'websocket'])
|
||||||
def test_hnswlib_vectordb_batch(docs_to_index, replicas, shards, protocol, tmpdir):
|
def test_hnswlib_vectordb_batch(docs_to_index, replicas, shards, protocol, tmpdir):
|
||||||
query = docs_to_index[:10]
|
query = docs_to_index[:10]
|
||||||
with HNSWVectorDB[MyDoc].serve(workspace=str(tmpdir), replicas=replicas, shards=shards, protocol=protocol,
|
port = random_port()
|
||||||
|
with HNSWVectorDB[MyDoc].serve(workspace=str(tmpdir), replicas=replicas, shards=shards, protocol=protocol, port=port,
|
||||||
uses_with={'ef': 5000}) as db:
|
uses_with={'ef': 5000}) as db:
|
||||||
db.index(inputs=docs_to_index)
|
db.index(inputs=docs_to_index)
|
||||||
if replicas > 1:
|
if replicas > 1:
|
||||||
|
@ -46,7 +48,8 @@ def test_hnswlib_vectordb_batch(docs_to_index, replicas, shards, protocol, tmpdi
|
||||||
@pytest.mark.parametrize('protocol', ['grpc', 'http', 'websocket'])
|
@pytest.mark.parametrize('protocol', ['grpc', 'http', 'websocket'])
|
||||||
def test_hnswlib_vectordb_single_query(docs_to_index, limit, replicas, shards, protocol, tmpdir):
|
def test_hnswlib_vectordb_single_query(docs_to_index, limit, replicas, shards, protocol, tmpdir):
|
||||||
query = docs_to_index[100]
|
query = docs_to_index[100]
|
||||||
with HNSWVectorDB[MyDoc](ef=5000).serve(workspace=str(tmpdir), replicas=replicas, shards=shards,
|
port = random_port()
|
||||||
|
with HNSWVectorDB[MyDoc](ef=5000).serve(workspace=str(tmpdir), replicas=replicas, shards=shards, port=port,
|
||||||
protocol=protocol) as db:
|
protocol=protocol) as db:
|
||||||
db.index(inputs=docs_to_index)
|
db.index(inputs=docs_to_index)
|
||||||
if replicas > 1:
|
if replicas > 1:
|
||||||
|
@ -63,8 +66,9 @@ def test_hnswlib_vectordb_single_query(docs_to_index, limit, replicas, shards, p
|
||||||
@pytest.mark.parametrize('protocol', ['grpc', 'http', 'websocket'])
|
@pytest.mark.parametrize('protocol', ['grpc', 'http', 'websocket'])
|
||||||
def test_hnswlib_vectordb_delete(docs_to_index, replicas, shards, protocol, tmpdir):
|
def test_hnswlib_vectordb_delete(docs_to_index, replicas, shards, protocol, tmpdir):
|
||||||
query = docs_to_index[0]
|
query = docs_to_index[0]
|
||||||
|
port = random_port()
|
||||||
delete = MyDoc(id=query.id, text='', embedding=np.random.rand(128))
|
delete = MyDoc(id=query.id, text='', embedding=np.random.rand(128))
|
||||||
with HNSWVectorDB[MyDoc].serve(workspace=str(tmpdir), replicas=replicas, shards=shards, protocol=protocol,
|
with HNSWVectorDB[MyDoc].serve(workspace=str(tmpdir), replicas=replicas, shards=shards, protocol=protocol, port=port,
|
||||||
uses_with={'ef': 5000}) as db:
|
uses_with={'ef': 5000}) as db:
|
||||||
db.index(inputs=docs_to_index)
|
db.index(inputs=docs_to_index)
|
||||||
if replicas > 1:
|
if replicas > 1:
|
||||||
|
@ -91,8 +95,9 @@ def test_hnswlib_vectordb_delete(docs_to_index, replicas, shards, protocol, tmpd
|
||||||
@pytest.mark.parametrize('protocol', ['grpc', 'http', 'websocket'])
|
@pytest.mark.parametrize('protocol', ['grpc', 'http', 'websocket'])
|
||||||
def test_hnswlib_vectordb_udpate_text(docs_to_index, replicas, shards, protocol, tmpdir):
|
def test_hnswlib_vectordb_udpate_text(docs_to_index, replicas, shards, protocol, tmpdir):
|
||||||
query = docs_to_index[0]
|
query = docs_to_index[0]
|
||||||
|
port = random_port()
|
||||||
update = MyDoc(id=query.id, text=query.text + '_changed', embedding=query.embedding)
|
update = MyDoc(id=query.id, text=query.text + '_changed', embedding=query.embedding)
|
||||||
with HNSWVectorDB[MyDoc].serve(workspace=str(tmpdir), replicas=replicas, shards=shards, protocol=protocol,
|
with HNSWVectorDB[MyDoc].serve(workspace=str(tmpdir), replicas=replicas, shards=shards, protocol=protocol, port=port,
|
||||||
uses_with={'ef': 5000}) as db:
|
uses_with={'ef': 5000}) as db:
|
||||||
db.index(inputs=docs_to_index)
|
db.index(inputs=docs_to_index)
|
||||||
if replicas > 1:
|
if replicas > 1:
|
||||||
|
@ -118,8 +123,9 @@ def test_hnswlib_vectordb_udpate_text(docs_to_index, replicas, shards, protocol,
|
||||||
@pytest.mark.parametrize('protocol', ['grpc', 'http', 'websocket'])
|
@pytest.mark.parametrize('protocol', ['grpc', 'http', 'websocket'])
|
||||||
def test_hnswlib_vectordb_restore(docs_to_index, replicas, shards, protocol, tmpdir):
|
def test_hnswlib_vectordb_restore(docs_to_index, replicas, shards, protocol, tmpdir):
|
||||||
query = docs_to_index[:100]
|
query = docs_to_index[:100]
|
||||||
|
port = random_port()
|
||||||
|
|
||||||
with HNSWVectorDB[MyDoc](ef=5000).serve(workspace=str(tmpdir), replicas=replicas, shards=shards,
|
with HNSWVectorDB[MyDoc](ef=5000).serve(workspace=str(tmpdir), replicas=replicas, shards=shards, port=port,
|
||||||
protocol=protocol) as db:
|
protocol=protocol) as db:
|
||||||
db.index(docs=docs_to_index)
|
db.index(docs=docs_to_index)
|
||||||
if replicas > 1:
|
if replicas > 1:
|
||||||
|
@ -132,7 +138,7 @@ def test_hnswlib_vectordb_restore(docs_to_index, replicas, shards, protocol, tmp
|
||||||
assert res.text == res.matches[0].text
|
assert res.text == res.matches[0].text
|
||||||
assert res.scores[0] < 0.001 # some precision issues, should be 0.0
|
assert res.scores[0] < 0.001 # some precision issues, should be 0.0
|
||||||
|
|
||||||
with HNSWVectorDB[MyDoc].serve(workspace=str(tmpdir), replicas=replicas, shards=shards,
|
with HNSWVectorDB[MyDoc].serve(workspace=str(tmpdir), replicas=replicas, shards=shards, port=port,
|
||||||
protocol=protocol, uses_with={'ef': 5000}) as new_db:
|
protocol=protocol, uses_with={'ef': 5000}) as new_db:
|
||||||
time.sleep(2)
|
time.sleep(2)
|
||||||
resp = new_db.search(docs=query)
|
resp = new_db.search(docs=query)
|
||||||
|
|
|
@ -7,6 +7,7 @@ import numpy as np
|
||||||
from docarray import DocList, BaseDoc
|
from docarray import DocList, BaseDoc
|
||||||
from docarray.typing import NdArray
|
from docarray.typing import NdArray
|
||||||
from vectordb import InMemoryExactNNVectorDB
|
from vectordb import InMemoryExactNNVectorDB
|
||||||
|
from jina.helper import random_port
|
||||||
|
|
||||||
|
|
||||||
class MyDoc(BaseDoc):
|
class MyDoc(BaseDoc):
|
||||||
|
@ -26,7 +27,8 @@ def docs_to_index():
|
||||||
@pytest.mark.parametrize('protocol', ['grpc', 'http', 'websocket'])
|
@pytest.mark.parametrize('protocol', ['grpc', 'http', 'websocket'])
|
||||||
def test_inmemory_vectordb_batch(docs_to_index, replicas, shards, protocol, tmpdir):
|
def test_inmemory_vectordb_batch(docs_to_index, replicas, shards, protocol, tmpdir):
|
||||||
query = docs_to_index[:10]
|
query = docs_to_index[:10]
|
||||||
with InMemoryExactNNVectorDB[MyDoc].serve(workspace=str(tmpdir), replicas=replicas, shards=shards,
|
port = random_port()
|
||||||
|
with InMemoryExactNNVectorDB[MyDoc].serve(workspace=str(tmpdir), replicas=replicas, shards=shards, port=port,
|
||||||
protocol=protocol) as db:
|
protocol=protocol) as db:
|
||||||
db.index(inputs=docs_to_index)
|
db.index(inputs=docs_to_index)
|
||||||
if replicas > 1:
|
if replicas > 1:
|
||||||
|
@ -46,7 +48,8 @@ def test_inmemory_vectordb_batch(docs_to_index, replicas, shards, protocol, tmpd
|
||||||
@pytest.mark.parametrize('protocol', ['grpc', 'http', 'websocket'])
|
@pytest.mark.parametrize('protocol', ['grpc', 'http', 'websocket'])
|
||||||
def test_inmemory_vectordb_single_query(docs_to_index, limit, replicas, shards, protocol, tmpdir):
|
def test_inmemory_vectordb_single_query(docs_to_index, limit, replicas, shards, protocol, tmpdir):
|
||||||
query = docs_to_index[100]
|
query = docs_to_index[100]
|
||||||
with InMemoryExactNNVectorDB[MyDoc].serve(workspace=str(tmpdir), replicas=replicas, shards=shards,
|
port = random_port()
|
||||||
|
with InMemoryExactNNVectorDB[MyDoc].serve(workspace=str(tmpdir), replicas=replicas, shards=shards, port=port,
|
||||||
protocol=protocol) as db:
|
protocol=protocol) as db:
|
||||||
db.index(inputs=docs_to_index)
|
db.index(inputs=docs_to_index)
|
||||||
if replicas > 1:
|
if replicas > 1:
|
||||||
|
@ -63,8 +66,9 @@ def test_inmemory_vectordb_single_query(docs_to_index, limit, replicas, shards,
|
||||||
@pytest.mark.parametrize('protocol', ['grpc', 'http', 'websocket'])
|
@pytest.mark.parametrize('protocol', ['grpc', 'http', 'websocket'])
|
||||||
def test_inmemory_vectordb_delete(docs_to_index, replicas, shards, protocol, tmpdir):
|
def test_inmemory_vectordb_delete(docs_to_index, replicas, shards, protocol, tmpdir):
|
||||||
query = docs_to_index[0]
|
query = docs_to_index[0]
|
||||||
|
port = random_port()
|
||||||
delete = MyDoc(id=query.id, text='', embedding=np.random.rand(128))
|
delete = MyDoc(id=query.id, text='', embedding=np.random.rand(128))
|
||||||
with InMemoryExactNNVectorDB[MyDoc].serve(workspace=str(tmpdir), replicas=replicas, shards=shards,
|
with InMemoryExactNNVectorDB[MyDoc].serve(workspace=str(tmpdir), replicas=replicas, shards=shards, port=port,
|
||||||
protocol=protocol) as db:
|
protocol=protocol) as db:
|
||||||
db.index(inputs=docs_to_index)
|
db.index(inputs=docs_to_index)
|
||||||
if replicas > 1:
|
if replicas > 1:
|
||||||
|
@ -91,8 +95,9 @@ def test_inmemory_vectordb_delete(docs_to_index, replicas, shards, protocol, tmp
|
||||||
@pytest.mark.parametrize('protocol', ['grpc', 'http', 'websocket'])
|
@pytest.mark.parametrize('protocol', ['grpc', 'http', 'websocket'])
|
||||||
def test_inmemory_vectordb_udpate_text(docs_to_index, replicas, shards, protocol, tmpdir):
|
def test_inmemory_vectordb_udpate_text(docs_to_index, replicas, shards, protocol, tmpdir):
|
||||||
query = docs_to_index[0]
|
query = docs_to_index[0]
|
||||||
|
port = random_port()
|
||||||
update = MyDoc(id=query.id, text=query.text + '_changed', embedding=query.embedding)
|
update = MyDoc(id=query.id, text=query.text + '_changed', embedding=query.embedding)
|
||||||
with InMemoryExactNNVectorDB[MyDoc].serve(workspace=str(tmpdir), replicas=replicas, shards=shards,
|
with InMemoryExactNNVectorDB[MyDoc].serve(workspace=str(tmpdir), replicas=replicas, shards=shards, port=port,
|
||||||
protocol=protocol) as db:
|
protocol=protocol) as db:
|
||||||
db.index(inputs=docs_to_index)
|
db.index(inputs=docs_to_index)
|
||||||
if replicas > 1:
|
if replicas > 1:
|
||||||
|
@ -118,8 +123,9 @@ def test_inmemory_vectordb_udpate_text(docs_to_index, replicas, shards, protocol
|
||||||
@pytest.mark.parametrize('protocol', ['grpc', 'http', 'websocket'])
|
@pytest.mark.parametrize('protocol', ['grpc', 'http', 'websocket'])
|
||||||
def test_inmemory_vectordb_restore(docs_to_index, replicas, shards, protocol, tmpdir):
|
def test_inmemory_vectordb_restore(docs_to_index, replicas, shards, protocol, tmpdir):
|
||||||
query = docs_to_index[:100]
|
query = docs_to_index[:100]
|
||||||
|
port = random_port()
|
||||||
|
|
||||||
with InMemoryExactNNVectorDB[MyDoc].serve(workspace=str(tmpdir), replicas=replicas, shards=shards,
|
with InMemoryExactNNVectorDB[MyDoc].serve(workspace=str(tmpdir), replicas=replicas, shards=shards, port=port,
|
||||||
protocol=protocol) as db:
|
protocol=protocol) as db:
|
||||||
db.index(docs=docs_to_index)
|
db.index(docs=docs_to_index)
|
||||||
if replicas > 1:
|
if replicas > 1:
|
||||||
|
@ -132,7 +138,7 @@ def test_inmemory_vectordb_restore(docs_to_index, replicas, shards, protocol, tm
|
||||||
assert res.text == res.matches[0].text
|
assert res.text == res.matches[0].text
|
||||||
assert res.scores[0] > 0.99 # some precision issues, should be 1
|
assert res.scores[0] > 0.99 # some precision issues, should be 1
|
||||||
|
|
||||||
with InMemoryExactNNVectorDB[MyDoc].serve(workspace=str(tmpdir), replicas=replicas, shards=shards,
|
with InMemoryExactNNVectorDB[MyDoc].serve(workspace=str(tmpdir), replicas=replicas, shards=shards, port=port,
|
||||||
protocol=protocol) as new_db:
|
protocol=protocol) as new_db:
|
||||||
time.sleep(2)
|
time.sleep(2)
|
||||||
resp = new_db.search(docs=query)
|
resp = new_db.search(docs=query)
|
||||||
|
|
Loading…
Reference in New Issue