docs: add config in readme

2023-06-16 13:40:48 +02:00
35 changed files with 321 additions and 886 deletions
--- a/.github/images/guide-1.png
+++ b/.github/images/guide-1.png
--- a/.github/images/guide-2.png
+++ b/.github/images/guide-2.png
--- a/.github/images/guide-3.png
+++ b/.github/images/guide-3.png
--- a/.github/images/vectordb-logo.png
+++ b/.github/images/vectordb-logo.png
--- a/.github/images/vectordb_deploy_list.png
+++ b/.github/images/vectordb_deploy_list.png
--- a/.github/images/vectordb_deploy_paused.png
+++ b/.github/images/vectordb_deploy_paused.png
--- a/.github/images/vectordb_deploy_screenshot.png
+++ b/.github/images/vectordb_deploy_screenshot.png
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@ -62,7 +62,7 @@ jobs:
          echo "::set-output name=matrix::$(bash scripts/get-all-test-paths.sh unit 1)"
      - id: set-matrix-integration
        run: |
-          echo "::set-output name=matrix::$(bash scripts/get-all-test-paths.sh integration 1)"
+          echo "::set-output name=matrix::$(bash scripts/get-all-test-paths.sh integration_local 1)"
    outputs:
      matrix-unit: ${{ steps.set-matrix-unit.outputs.matrix }}
      matrix-integration: ${{ steps.set-matrix-integration.outputs.matrix }}
@ -88,15 +88,12 @@ jobs:
          python -m pip install --upgrade pip
          python -m pip install wheel
          pip install pytest
-          pip install pytest-repeat
-          pip install pytest-timeout
-          pip install flaky
          pip install .
-          pip install -U docarray[hnswlib]>=0.34.0
+          pip install -U docarray[hnswlib]>=0.33.0
      - name: Test
        id: test
        run: |
-          pytest -v -s --force-flaky --min-passes 1 --max-runs 5 ${{ matrix.test-path }}
+          pytest -v -s ${{ matrix.test-path }}
        timeout-minutes: 30

  integration-tests:
@ -120,15 +117,12 @@ jobs:
          python -m pip install --upgrade pip
          python -m pip install wheel
          pip install pytest
-          pip install pytest-repeat
-          pip install pytest-timeout
-          pip install flaky
          pip install .
-          pip install -U docarray[hnswlib]>=0.34.0
+          pip install -U docarray[hnswlib]>=0.33.0
      - name: Test
        id: test
        run: |
-          pytest -v -s --force-flaky --min-passes 1 --max-runs 5 ${{ matrix.test-path }}
+          pytest -v -s ${{ matrix.test-path }}
        timeout-minutes: 30

  # just for blocking the merge until all parallel integration-tests are successful
--- a/.github/workflows/force-docker-build.yml
+++ b/.github/workflows/force-docker-build.yml
@ -51,8 +51,8 @@ jobs:
      - name: Login to DockerHub
        uses: docker/login-action@v1
        with:
-          username: ${{ secrets.DOCKERHUB_DEVBOT_USER }}
-          password: ${{ secrets.DOCKERHUB_DEVBOT_PWD }}
+          username: ${{ secrets.DOCKERHUB_JINAVECTORDB_USER }}
+          password: ${{ secrets.DOCKERHUB_JINAVECTORDB_TOKEN }}
      - name: Build and push
        uses: docker/build-push-action@v2
        with:
--- a/.github/workflows/force-release.yml
+++ b/.github/workflows/force-release.yml
@ -54,6 +54,7 @@ jobs:
        env:
          TWINE_USERNAME: ${{ secrets.TWINE_USERNAME }}
          TWINE_PASSWORD: ${{ secrets.TWINE_PASSWORD }}
+          JINA_SLACK_WEBHOOK: ${{ secrets.JINA_SLACK_WEBHOOK }}
      - if: failure()
        run: echo "nothing to release"
      - name: bumping master version
--- a/.github/workflows/tag.yml
+++ b/.github/workflows/tag.yml
@ -1,37 +0,0 @@
-name: Release CD
-
-on:
-  push:
-    tags:
-      - "v*"  # push to version tags trigger the build
-
-#on:
-#  push:
-#    branches-ignore:
-#      - '**'  # temporally disable this action
-
-jobs:
-  create-release:
-    permissions: write-all
-    runs-on: ubuntu-latest
-    steps:
-      - name: Checkout code
-        uses: actions/checkout@v2
-        with:
-          ref: 'main'
-      - uses: actions/setup-python@v2
-        with:
-          python-version: 3.7
-      - run: |
-          python scripts/get-last-release-note.py
-      - name: Create Release
-        id: create_release
-        uses: actions/create-release@v1
-        env:
-          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} # This token is provided by Actions, you do not need to create your own token
-        with:
-          tag_name: ${{ github.ref }}
-          release_name: 💫 Patch ${{ github.ref }}
-          body_path: 'tmp.md'
-          draft: false
-          prerelease: false
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -1,359 +0,0 @@
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-<a name=release-note-0-0-1></a>
-## Release Note (`0.0.1`)
-
-> Release time: 2023-06-19 11:05:34
-
-
-
-🙇 We'd like to thank all contributors for this new release! In particular,
- Joan Fontanals Martinez,  🙇
-
-
-### 🏁 Unit Test and CICD
-
- - [[```a271d1a8```](https://github.com/jina-ai/jina/commit/a271d1a84b189174610183b6a49a9fe32b1e2aa5)] __-__ fix release yaml (*Joan Fontanals Martinez*)
-
-<a name=release-note-0-0-2></a>
-## Release Note (`0.0.2`)
-
-> Release time: 2023-06-22 13:43:16
-
-
-
-🙇 We'd like to thank all contributors for this new release! In particular,
- Joan Fontanals,  Jina Dev Bot,  🙇
-
-
-### 🍹 Other Improvements
-
- - [[```0e5fd825```](https://github.com/jina-ai/jina/commit/0e5fd82589fd8dc9f6256d9b710ad03854353096)] __-__ update versions of Jina and DocArray (#30) (*Joan Fontanals*)
- - [[```13f79de3```](https://github.com/jina-ai/jina/commit/13f79de3cd384771ac886d31a7f5737fc2470f11)] __-__ debug test (#29) (*Joan Fontanals*)
- - [[```41b6ac95```](https://github.com/jina-ai/jina/commit/41b6ac954c2a1965f90829ff8a3114df94884e5c)] __-__ __version__: the next version will be 0.0.2 (*Jina Dev Bot*)
-
-<a name=release-note-0-0-4></a>
-## Release Note (`0.0.4`)
-
-> Release time: 2023-07-10 09:54:52
-
-
-
-🙇 We'd like to thank all contributors for this new release! In particular,
- Joan Fontanals,  Jina Dev Bot,  🙇
-
-
-### 🐞 Bug fixes
-
- - [[```626b534c```](https://github.com/jina-ai/jina/commit/626b534ce363890b0d79fed9ec8ccd233e79112b)] __-__ do not override dry run endpoints schemas (#33) (*Joan Fontanals*)
- - [[```d66ef1bd```](https://github.com/jina-ai/jina/commit/d66ef1bd1fbeb33c7fb76a306ec9b6ac81ac6673)] __-__ fix some stuff for deployment (#31) (*Joan Fontanals*)
-
-### 🍹 Other Improvements
-
- - [[```39dc5f9e```](https://github.com/jina-ai/jina/commit/39dc5f9e26f1028220e3f5b2d7a87cbfe2674563)] __-__ update vectordb version and requirements (#35) (*Joan Fontanals*)
- - [[```ad507a6d```](https://github.com/jina-ai/jina/commit/ad507a6d685dcf7326b425edda5e456aa20097cd)] __-__ Update README.md (#34) (*Joan Fontanals*)
- - [[```0c0627ee```](https://github.com/jina-ai/jina/commit/0c0627eed984a39c48fb939fb805928fff58570c)] __-__ some changes (#32) (*Joan Fontanals*)
- - [[```73236ff9```](https://github.com/jina-ai/jina/commit/73236ff9cf2fe811f830955a81ab320288962b29)] __-__ __version__: the next version will be 0.0.3 (*Jina Dev Bot*)
-
-<a name=release-note-0-0-5></a>
-## Release Note (`0.0.5`)
-
-> Release time: 2023-07-10 10:27:03
-
-
-
-🙇 We'd like to thank all contributors for this new release! In particular,
- Joan Fontanals,  Jina Dev Bot,  🙇
-
-
-### 🍹 Other Improvements
-
- - [[```16ceb6d7```](https://github.com/jina-ai/jina/commit/16ceb6d72227aefae47b62e1ef234e6ad98b6483)] __-__ add long description (#36) (*Joan Fontanals*)
- - [[```28ea2948```](https://github.com/jina-ai/jina/commit/28ea29485dcc516ed829a251388612cc664bb96a)] __-__ __version__: the next version will be 0.0.5 (*Jina Dev Bot*)
-
-<a name=release-note-0-0-6></a>
-## Release Note (`0.0.6`)
-
-> Release time: 2023-07-10 13:22:08
-
-
-
-🙇 We'd like to thank all contributors for this new release! In particular,
- Joan Fontanals,  Jina Dev Bot,  🙇
-
-
-### 📗 Documentation
-
- - [[```cf1b6b29```](https://github.com/jina-ai/jina/commit/cf1b6b29d543af91cd20bf800043eee6ebf4fe53)] __-__ fix README (#37) (*Joan Fontanals*)
-
-### 🍹 Other Improvements
-
- - [[```ca47a292```](https://github.com/jina-ai/jina/commit/ca47a292f26a758a181a36a75e3c33a9a578ed5f)] __-__ __version__: the next version will be 0.0.6 (*Jina Dev Bot*)
-
-<a name=release-note-0-0-7></a>
-## Release Note (`0.0.7`)
-
-> Release time: 2023-07-10 14:00:14
-
-
-
-🙇 We'd like to thank all contributors for this new release! In particular,
- Joan Fontanals,  Jina Dev Bot,  🙇
-
-
-### 📗 Documentation
-
- - [[```69492e06```](https://github.com/jina-ai/jina/commit/69492e0663a2d79c9500beea77c564050064ce8d)] __-__ fix README (#38) (*Joan Fontanals*)
-
-### 🍹 Other Improvements
-
- - [[```07a8480a```](https://github.com/jina-ai/jina/commit/07a8480a2905a8d7293f79c0668818f684bbd65d)] __-__ __version__: the next version will be 0.0.7 (*Jina Dev Bot*)
-
-<a name=release-note-0-0-8></a>
-## Release Note (`0.0.8`)
-
-> Release time: 2023-07-11 15:24:25
-
-
-
-🙇 We'd like to thank all contributors for this new release! In particular,
- Deepankar Mahapatro,  Joan Fontanals,  Jina Dev Bot,  🙇
-
-
-### 🏁 Unit Test and CICD
-
- - [[```6ca57600```](https://github.com/jina-ai/jina/commit/6ca576003403bfab6d0222adbb8be7edfe812337)] __-__ revert force-release step (#40) (*Deepankar Mahapatro*)
-
-### 🍹 Other Improvements
-
- - [[```942f1f5d```](https://github.com/jina-ai/jina/commit/942f1f5d43c2433ecfbaa0f3ae66c90d6caa896d)] __-__ add badges to README (#39) (*Joan Fontanals*)
- - [[```d012a48c```](https://github.com/jina-ai/jina/commit/d012a48cfeb79f2ffe52bf24f5a8e9f11b637f92)] __-__ __version__: the next version will be 0.0.8 (*Jina Dev Bot*)
-
-<a name=release-note-0-0-9></a>
-## Release Note (`0.0.9`)
-
-> Release time: 2023-07-11 15:58:29
-
-
-
-🙇 We'd like to thank all contributors for this new release! In particular,
- Deepankar Mahapatro,  Jina Dev Bot,  🙇
-
-
-### 🏁 Unit Test and CICD
-
- - [[```d1d24d9b```](https://github.com/jina-ai/jina/commit/d1d24d9bc6596bca2cdd055885829e345318820c)] __-__ add tag.yml (#41) (*Deepankar Mahapatro*)
-
-### 🍹 Other Improvements
-
- - [[```9dc3c63e```](https://github.com/jina-ai/jina/commit/9dc3c63e6e9eb8b79336194f9a52e66a77c0faef)] __-__ __version__: the next version will be 0.0.9 (*Jina Dev Bot*)
-
-<a name=release-note-0-0-10></a>
-## Release Note (`0.0.10`)
-
-> Release time: 2023-07-13 20:19:10
-
-
-
-🙇 We'd like to thank all contributors for this new release! In particular,
- Joan Fontanals,  Jina Dev Bot,  🙇
-
-
-### 🐞 Bug fixes
-
- - [[```d63c2959```](https://github.com/jina-ai/jina/commit/d63c295930d16ebfd83acafa65f4eac68cb5ad80)] __-__ fix setup reqs (#42) (*Joan Fontanals*)
-
-### 🍹 Other Improvements
-
- - [[```574a8968```](https://github.com/jina-ai/jina/commit/574a8968b5f11f5a2aba88c0bd4cd7bfd45b0427)] __-__ __version__: the next version will be 0.0.10 (*Jina Dev Bot*)
-
-<a name=release-note-0-0-11></a>
-## Release Note (`0.0.11`)
-
-> Release time: 2023-07-14 06:51:55
-
-
-
-🙇 We'd like to thank all contributors for this new release! In particular,
- Joan Fontanals,  Jina Dev Bot,  🙇
-
-
-### 🍹 Other Improvements
-
- - [[```e298e6ef```](https://github.com/jina-ai/jina/commit/e298e6ef500193d6a4e98760b2137c071514a704)] __-__ add MANIFEST.in (#43) (*Joan Fontanals*)
- - [[```790f6341```](https://github.com/jina-ai/jina/commit/790f6341007a744976cbd0bc431ba2d3bb5327e8)] __-__ __version__: the next version will be 0.0.11 (*Jina Dev Bot*)
-
-<a name=release-note-0-0-14></a>
-## Release Note (`0.0.14`)
-
-> Release time: 2023-07-14 10:30:51
-
-
-
-🙇 We'd like to thank all contributors for this new release! In particular,
- Joan Fontanals,  Joan Fontanals Martinez,  Han Xiao,  Deepankar Mahapatro,  Jina Dev Bot,  🙇
-
-
-### 🐞 Bug fixes
-
- - [[```5ef1c964```](https://github.com/jina-ai/jina/commit/5ef1c964953e7aac9f0336317de29fb83b408938)] __-__ fix single quotes (#47) (*Joan Fontanals*)
-
-### 🍹 Other Improvements
-
- - [[```14bc575b```](https://github.com/jina-ai/jina/commit/14bc575ba99387b5239e4b607741eb151da16182)] __-__ upgrade version (*Joan Fontanals Martinez*)
- - [[```daef4aac```](https://github.com/jina-ai/jina/commit/daef4aac87751ecf191da1ab75dd9938865a5dc5)] __-__ fix readme (*Han Xiao*)
- - [[```c24b8d95```](https://github.com/jina-ai/jina/commit/c24b8d95dae0cd43d4b0e61dd0c587d85b2950d8)] __-__ update readme (*Han Xiao*)
- - [[```1df0dcc0```](https://github.com/jina-ai/jina/commit/1df0dcc0901480deccf3b580c1e00b1f4bc0dfb6)] __-__ fix message in readme (*Joan Fontanals Martinez*)
- - [[```ced29140```](https://github.com/jina-ai/jina/commit/ced291408538c21e5d310cc643c080789b382b4d)] __-__ fix help message serve (#45) (*Joan Fontanals*)
- - [[```7efdc669```](https://github.com/jina-ai/jina/commit/7efdc669ac0cef150e6972f757b9525f66aed52e)] __-__ ignore warnings (#44) (*Deepankar Mahapatro*)
- - [[```8e1ab38b```](https://github.com/jina-ai/jina/commit/8e1ab38b5017feacac1caa64da5a3fc039eb872a)] __-__ __version__: the next version will be 0.0.12 (*Jina Dev Bot*)
-
-<a name=release-note-0-0-15></a>
-## Release Note (`0.0.15`)
-
-> Release time: 2023-07-19 15:17:53
-
-
-
-🙇 We'd like to thank all contributors for this new release! In particular,
- Joan Fontanals,  Jina Dev Bot,  🙇
-
-
-### 🐞 Bug fixes
-
- - [[```497d1ef6```](https://github.com/jina-ai/jina/commit/497d1ef608dba6db73ce96fe1f5f5849a0cb4e0e)] __-__ fix db port for deployment (#48) (*Joan Fontanals*)
-
-### 🍹 Other Improvements
-
- - [[```d3d4a43c```](https://github.com/jina-ai/jina/commit/d3d4a43ce382c2526946818ce7f907f8b997bc9d)] __-__ __version__: the next version will be 0.0.15 (*Jina Dev Bot*)
-
-<a name=release-note-0-0-16></a>
-## Release Note (`0.0.16`)
-
-> Release time: 2023-07-19 16:33:59
-
-
-
-🙇 We'd like to thank all contributors for this new release! In particular,
- Joan Fontanals,  Jina Dev Bot,  🙇
-
-
-### 🍹 Other Improvements
-
- - [[```0357cfea```](https://github.com/jina-ai/jina/commit/0357cfead576b237675f1d1f38360a391deb9a3a)] __-__ fix docarray version in deploy (#49) (*Joan Fontanals*)
- - [[```44856718```](https://github.com/jina-ai/jina/commit/44856718e7ce838cfd7e02f15f82c0d90b815694)] __-__ __version__: the next version will be 0.0.16 (*Jina Dev Bot*)
-
-<a name=release-note-0-0-17></a>
-## Release Note (`0.0.17`)
-
-> Release time: 2023-07-25 02:37:14
-
-
-
-🙇 We'd like to thank all contributors for this new release! In particular,
- Joan Fontanals,  Jina Dev Bot,  🙇
-
-
-### 🐞 Bug fixes
-
- - [[```a48f341c```](https://github.com/jina-ai/jina/commit/a48f341c8f5fc13ae70144fca9f73ec6de1764d0)] __-__ fix applying serve and deploy on instance (#53) (*Joan Fontanals*)
-
-### 🏁 Unit Test and CICD
-
- - [[```e0c9921c```](https://github.com/jina-ai/jina/commit/e0c9921c3b720d799bf6f99582a6d2eeb5c1ffee)] __-__ optout telemetry in test (#50) (*Joan Fontanals*)
-
-### 🍹 Other Improvements
-
- - [[```be1f72da```](https://github.com/jina-ai/jina/commit/be1f72da50c06982ed9abcedcb5db75dc9faf32f)] __-__ __version__: the next version will be 0.0.17 (*Jina Dev Bot*)
-
-<a name=release-note-0-0-18></a>
-## Release Note (`0.0.18`)
-
-> Release time: 2023-08-23 07:50:40
-
-
-
-🙇 We'd like to thank all contributors for this new release! In particular,
- Joan Fontanals,  Jina Dev Bot,  🙇
-
-
-### 🐞 Bug fixes
-
- - [[```a8e48b23```](https://github.com/jina-ai/jina/commit/a8e48b23aa4bb7a000f9099f24711473b31cf24a)] __-__ fix incompatibility with Jina version (#55) (*Joan Fontanals*)
-
-### 🍹 Other Improvements
-
- - [[```4c5f5ebc```](https://github.com/jina-ai/jina/commit/4c5f5ebc909d7308daf9c1813fc8e022f93ad1d6)] __-__ __version__: the next version will be 0.0.18 (*Jina Dev Bot*)
-
-<a name=release-note-0-0-19></a>
-## Release Note (`0.0.19`)
-
-> Release time: 2023-10-08 03:17:44
-
-
-
-🙇 We'd like to thank all contributors for this new release! In particular,
- 0x376h,  Naymul Islam,  Jina Dev Bot,  🙇
-
-
-### 🍹 Other Improvements
-
- - [[```059dc489```](https://github.com/jina-ai/jina/commit/059dc489b4e2c698ef8e71811eafee2c7a0ae500)] __-__ add num_docs (#62) (*0x376h*)
- - [[```35cbf73a```](https://github.com/jina-ai/jina/commit/35cbf73a355e33667e66d9949c8ef58e28a10bd1)] __-__ __version__: the next version will be 0.0.19 (*Jina Dev Bot*)
-
-<a name=release-note-0-0-20></a>
-## Release Note (`0.0.20`)
-
-> Release time: 2023-10-23 10:26:48
-
-
-
-🙇 We'd like to thank all contributors for this new release! In particular,
- 0x376h,  Jina Dev Bot,  🙇
-
-
-### 🆕 New Features
-
- - [[```5f8fc998```](https://github.com/jina-ai/jina/commit/5f8fc998c8c6b29553bff2146bc64166072f8b6a)] __-__ dd method  queryid  to check id exists (#63) (*0x376h*)
-
-### 🍹 Other Improvements
-
- - [[```a4308081```](https://github.com/jina-ai/jina/commit/a43080813c53c62f86f29083fab5f1a104c27388)] __-__ __version__: the next version will be 0.0.20 (*Jina Dev Bot*)
-
-<a name=release-note-0-0-21></a>
-## Release Note (`0.0.21`)
-
-> Release time: 2024-03-04 17:10:02
-
-
-
-🙇 We'd like to thank all contributors for this new release! In particular,
- Gabe Goodhart,  Jina Dev Bot,  🙇
-
-
-### 🐞 Bug fixes
-
- - [[```f0200e58```](https://github.com/jina-ai/jina/commit/f0200e58721549e009dd872c21642d44c7082177)] __-__ handle missing __validators__ (#68) (*Gabe Goodhart*)
-
-### 🍹 Other Improvements
-
- - [[```53cd4a54```](https://github.com/jina-ai/jina/commit/53cd4a54d8d8411170e9b384feed8ede2f75b423)] __-__ __version__: the next version will be 0.0.21 (*Jina Dev Bot*)
-
--- a/Dockerfiles/vectordb.Dockerfile
+++ b/Dockerfiles/vectordb.Dockerfile
@ -8,4 +8,6 @@ COPY . /vectordb/

 RUN cd /vectordb && pip install -U pip && pip install .

+RUN pip install -U docarray[hnswlib]>=0.33
+
 ENTRYPOINT ["vectordb"]
--- a/MANIFEST.in
+++ b/MANIFEST.in
@ -1,6 +0,0 @@
-recursive-include vectordb/ *
-global-exclude __pycache__/*
-include LICENSE
-include requirements.txt
-prune test/
-prune **/tests/
--- a/README.md
+++ b/README.md
@ -1,306 +1,261 @@
-<p align="center">
-<a href="https://docs.jina.ai"><img src="https://github.com/jina-ai/vectordb/blob/main/.github%2Fimages%2Fvectordb-logo.png?raw=true" alt="VectorDB from Jina AI logo" width="300px"></a>
-</p>
+# Vector Database for Python Developers

-<p align="center">
-<b>A Python vector database you just need - no more, no less.</b>
-</p>
+Vector Databases are databases that store embeddings representing data to provide semantic similarity between objects. Vector databases
+are used to perform similarity search between multimodal data, such as text, image, audio or videos and also are powering LLM applications
+to provide context for LLMs to improve the results of the generation and prevent evaluations. 

-<p align=center>
-<a href="https://pypi.org/project/vectordb/"><img alt="PyPI" src="https://img.shields.io/pypi/v/vectordb?label=Release&style=flat-square"></a>
-<a href="https://discord.jina.ai"><img src="https://img.shields.io/discord/1106542220112302130?logo=discord&logoColor=white&style=flat-square"></a>
-<a href="https://pypistats.org/packages/vectordb"><img alt="PyPI - Downloads from official pypistats" src="https://img.shields.io/pypi/dm/vectordb?style=flat-square"></a>
-<a href="https://github.com/jina-ai/vectordb/actions/workflows/cd.yml"><img alt="Github CD status" src="https://github.com/jina-ai/vectordb/actions/workflows/cd.yml/badge.svg"></a>
-</p>
+`vectordb` is a simple, user-friendly solution for Python developers looking to create their own vector database with CRUD support. Vector databases are a key component of the stack needed to use LLMs as they allow them to have access to context and memory. Many of the solutions out there require developers and users to use complex solutions that are often not needed. With `vectordb`, you can easily create your own vector database solution that can work locally and still be easily deployed and served with scalability features such as sharding and replication. 

-`vectordb` is a Pythonic vector database offers a comprehensive suite of [CRUD](#crud-support) (Create, Read, Update, Delete) operations and robust [scalability options, including sharding and replication](#scaling-your-db). It's readily deployable in a variety of environments, from [local](#getting-started-with-vectordb-locally) to [on-premise](#getting-started-with-vectordb-as-a-service) and [cloud](#hosting-vectordb-on-jina-ai-cloud). `vectordb` delivers exactly what you need - no more, no less. It's a testament to effective Pythonic design without over-engineering, making it a lean yet powerful solution for all your needs.
+Start with your solution as a local library and seamlessly transition into a served database with all the needed capability. No extra complexity than the needed one.

+`vectordb` is based on the local libraries wrapped inside [DocArray](https://github.com/docarray/docarray) and the scalability, reliability and servinc capabilities of [Jina](https://github.com/jina-ai/jina). 

-
-`vectordb` capitalizes on the powerful retrieval prowess of [DocArray](https://github.com/docarray/docarray) and the scalability, reliability, and serving capabilities of [Jina](https://github.com/jina-ai/jina). Here's the magic: DocArray serves as the engine driving vector search logic, while Jina guarantees efficient and scalable index serving. This synergy culminates in a robust, yet user-friendly vector database experience - that's `vectordb` for you.
-
-
-
-<!--In simple terms, one can think as [DocArray](https://github.com/docarray/docarray) being a the `Lucene` algorithmic logic for Vector Search powering the retrieval capabilities and [Jina](https://github.com/jina-ai/jina), the ElasticSearch making sure that the indexes are served and scaled for the clients, `vectordb` wraps these technologies to give a powerful and easy to use experience to
-use and develop vector databases.-->
+In simple terms, one can think as [DocArray](https://github.com/docarray/docarray) being a the `Lucene` algorithmic logic for Vector Search powering the retrieval capabilities and [Jina](https://github.com/jina-ai/jina), the ElasticSearch making sure that the indexes are served and scaled for the clients, `vectordb` wraps these technologies to give a powerful and easy to use experience to
+use and develop vector databases.

 <!--(THIS CAN BE SHOWN WHEN CUSTOMIZATION IS ENABLED) `vectordb` allows you to start simple and work locally while allowing when needed to deploy and scale in a seamless manner. With the help of [DocArray](https://github.com/docarray/docarray) and [Jina](https://github.com/jina-ai/jina) `vectordb` allows developers to focus on the algorithmic part and tweak the core of the vector search with Python as they want while keeping it easy to scale and deploy the solution. -->

 <!--(THIS CAN BE SHOWN WHEN CUSTOMIZATION IS ENABLED) Stop wondering what exact algorithms do existing solutions apply, how do they apply filtering or how to map your schema to their solutions, with `vectordb` you as a Python developer can easily understand and control what is the vector search algorithm doing, giving you the full control if needed while supporting you for local setting and in more advanced and demanding scenarios in the cloud. -->  

-## Install
+## :muscle: Features

-```bash
-pip install vectordb
-```
+- User-friendly interface: `vectordb` is designed with simplicity and ease of use in mind, making it accessible even for beginners.

-<table>
-  <tr>
-    <td>
-      <a href="#getting-started-with-vectordb-locally">
-        <img src="https://github.com/jina-ai/vectordb/blob/main/.github%2Fimages%2Fguide-1.png?raw=true" alt="Use vectordb from Jina AI locally" width="100%">
-      </a>
-    </td>
-    <td>
-      <a href="#getting-started-with-vectordb-as-a-service">
-        <img src="https://github.com/jina-ai/vectordb/blob/main/.github%2Fimages%2Fguide-2.png?raw=true" alt="Use vectordb from Jina AI as a service" width="100%">
-      </a>
-    </td>
-    <td>
-      <a href="#hosting-vectordb-on-jina-ai-cloud">
-        <img src="https://github.com/jina-ai/vectordb/blob/main/.github%2Fimages%2Fguide-3.png?raw=true" alt="Use vectordb from Jina AI on Jina AI Cloud" width="100%">
-      </a>
-    </td>
-  </tr>
-</table>
+- Adapts to your needs: `vectordb` is designed to offer what you need without extra complexity, supporting the features needed at every step. From local, to serve, to the cloud in a seamless way.

+- CRUD support: `vectordb` support CRUD operations, index, search, update and delete.

+- Serve: Serve the databases to insert or search as a service with `gRPC` or `HTTP` protocol.

-## Getting started with `vectordb` locally
+- Scalable: With `vectordb`, you can deploy your database in the cloud and take advantage of powerful scalability features like sharding and replication. With this, you can easily improve the latency of your service by sharding your data, or improve the availability and throughput by allowing `vectordb` to offer replication.

-1. Kick things off by defining a Document schema with the [DocArray](https://docs.docarray.org/user_guide/representing/first_step/) dataclass syntax:
+- Deploy to the cloud: If you need to deploy your service in the cloud, you can easily deploy in [Jina AI Cloud](). More deployment options will soon come. 
+
+- Serverless capacity: `vectordb` can be deployed in the cloud in serverless mode, allowing you to save resources and have the data available only when needed.
+
+- Multiple ANN algorithms: `vectordb` contains different implementations of ANN algorithms. These are the ones offered so far, we plan to integrate more:
+   - InMemoryExactNNVectorDB (Exact NN Search): Implements Simple Nearest Neighbour Algorithm.   
+   - HNSWVectorDB (based on HNSW): Based on [HNSWLib](https://github.com/nmslib/hnswlib)
+
+<!--(THIS CAN BE SHOWN WHEN FILTER IS ENABLED)- Filter capacity: `vectordb` allows you to have filters on top of the ANN search. -->
+
+<!--(THIS CAN BE SHOWN WHEN FILTER IS ENABLED)- Customizable: `vectordb` can be easily extended to suit your specific needs or schemas, so you can build the database you want and for any input and output schema you want with the help of [DocArray](https://github.com/docarray/docarray).-->
+
+## 🏁 Getting Started
+
+To get started with Vector Database, simply follow these easy steps, in this example we are going to use `InMemoryExactNNVectorDB` as example:
+
+1. Install `vectordb`: 
+
+```pip install vectordb```
+
+2. Define your Index Document schema using [DocArray](https://docs.docarray.org/user_guide/representing/first_step/):

 ```python
 from docarray import BaseDoc
 from docarray.typing import NdArray

-class ToyDoc(BaseDoc):
-  text: str = ''
-  embedding: NdArray[128]
+class MyTextDoc(TextDoc):
+   text: str = ''
+   embedding: NdArray[768]
 ```

-2. Opt for a pre-built database (like `InMemoryExactNNVectorDB` or `HNSWVectorDB`), and apply the schema:
+Make sure that the schema has a field `schema` as a `tensor` type with shape annotation as in the example.
+
+3. Use any of the pre-built databases with the document schema (InMemoryExactNNVectorDB or HNSWVectorDB): 

 ```python
-from docarray import DocList
-import numpy as np
 from vectordb import InMemoryExactNNVectorDB, HNSWVectorDB
+db = InMemoryExactNNVectorDB[MyTextDoc](workspace='./workspace_path')

-# Specify your workspace path
-db = InMemoryExactNNVectorDB[ToyDoc](workspace='./workspace_path')
-
-# Index a list of documents with random embeddings
-doc_list = [ToyDoc(text=f'toy doc {i}', embedding=np.random.rand(128)) for i in range(1000)]
-db.index(inputs=DocList[ToyDoc](doc_list))
-
-# Perform a search query
-query = ToyDoc(text='query', embedding=np.random.rand(128))
-results = db.search(inputs=DocList[ToyDoc]([query]), limit=10)
-
-# Print out the matches
-for m in results[0].matches:
-  print(m)
+db.index(inputs=DocList[MyTextDoc]([MyTextDoc(text=f'index {i}', embedding=np.random.rand(128)) for i in range(1000)]))
+results = db.search(inputs=DocList[MyTextDoc]([MyTextDoc(text='query', embedding=np.random.rand(128)]), limit=10)
 ```

-Since we issued a single query, `results` contains only one element. The nearest neighbour search results are conveniently stored in the `.matches` attribute.
+Each result will contain the matches under the `.matches` attribute as a `DocList[MyTextDoc]`

-## Getting started with `vectordb` as a service
-
-`vectordb` is designed to be easily served as a service, supporting `gRPC`, `HTTP`, and `Websocket` communication protocols. 
-
-### Server Side
-
-On the server side, you would start the service as follows:
+4. Serve the database as a service with any of these protocols: `gRPC`, `HTTP` and `Webscoket`.

 ```python
-with db.serve(protocol='grpc', port=12345, replicas=1, shards=1) as service:
+with InMemoryExactNNVectorDB[MyTextDoc].serve(workspace='./hnwslib_path', protocol='grpc', port=12345, replicas=1, shards=1) as service:
+   service.index(inputs=DocList[TextDoc]([TextDoc(text=f'index {i}', embedding=np.random.rand(128)) for i in range(1000)]))
   service.block()
 ```

-This command starts `vectordb` as a service on port `12345`, using the `gRPC` protocol with `1` replica and `1` shard.
-
-### Client Side
-On the client side, you can access the service with the following commands:
+5. Interact with the database through a client in a similar way as previously:

 ```python
 from vectordb import Client

-# Instantiate a client connected to the server. In practice, replace 0.0.0.0 to the server IP address.
-client = Client[ToyDoc](address='grpc://0.0.0.0:12345')
-
-# Perform a search query
-results = client.search(inputs=DocList[ToyDoc]([query]), limit=10)
+c = Client[MyTextDoc](address='grpc://0.0.0.0:12345')
+results = c.search(inputs=DocList[TextDoc]([TextDoc(text='query', embedding=np.random.rand(128)]), limit=10)
 ```

-This allows you to perform a search query, receiving the results directly from the remote `vectordb` service.
+## CRUD API:
+
+When using `vectordb` as a library or accesing it from a client to a served instance, the Python objects share the exact same API
+to provide `index`, `search`, `update` and `delete` capability:
+
+- `index`: Index gets as input the `DocList` to index.
+
+- `search`: Search gets as input the `DocList` of batched queries or a single `BaseDoc` as single query. It returns a single or multiple results where each query has `matches` and `scores` attributes sorted by `relevance`.
+
+- `delete`: Delete gets as input the `DocList` of documents to delete from the index. The `delete` operation will only care for the `id` attribute, so you need to keep track of the `indexed` `IDs` if you want to delete documents.
+
+- `update`: Delete gets as input the `DocList` of documents to update in the index. The `update` operation will update the `indexed` document with the same Index with the attributes and payload from the input documents.
+
+## :rocket: Serve and scale your own Database, add replication and sharding
+
+### Serving:
+
+In order to enable your `vectordb` served so that it can be accessed from a Client, you can give the following parameters:
+
+- protocol: The protocol to be used for serving, it can be `gRPC`, `HTTP`, `websocket` or any combination of them provided as a list. Defaults to `gRPC`
+
+- port: The port where the service will be accessible, it can be a list of one port for each protocol provided. Default to 8081
+
+- workspace: The workspace is the path used by the VectorDB to hold and persist required data. Defaults to '.' (current directory)


-## Hosting `vectordb` on Jina AI Cloud
+### Scalability

-You can seamlessly deploy your `vectordb` instance to Jina AI Cloud, which ensures access to your database from any location.
+When serving or deploying your Vector Databases you can set 2 scaling parameters and `vectordb`:

-Start by embedding your database instance or class into a Python file:
+- Shards: The number of shards in which the data will be split. This will allow for better latency. `vectordb` will make sure that Documents are indexed in only one of the shards, while search request will be sent to all the shards and `vectordb` will make sure to merge the results from all shards.
+
+- Replicas: The number of replicas of the same DB that must exist. The given replication factor will be shared by all the `shards`. `vectordb` uses [RAFT](https://raft.github.io/) algorithm to ensure that the index is in sync between all the replicas of each shard. With this, `vectordb` increases the availability of the service and allows for better search throughput as multiple replicas can respond in parallel to more search requests while allowing CRUD operations. 
+
+** When deployed to JCloud, the number of replicas will be set to 1. We are working to enable replication in the cloud
+
+## 💻 `vectordb` CLI
+
+`vectordb` is a simple CLI that helps you to serve and deploy your `vectordb` db.
+
+First, you need to embed your database instance or class in a python file.

 ```python
 # example.py
-from docarray import BaseDoc
+from docarray import DocList, BaseDoc
+from docarray.typing import NdArray
 from vectordb import InMemoryExactNNVectorDB

-db = InMemoryExactNNVectorDB[ToyDoc](workspace='./vectordb') # notice how `db` is the instance that we want to serve
+
+class MyDoc(BaseDoc):
+    text: str
+    embedding: NdArray[128]
+
+
+db = InMemoryExactNNVectorDB[MyDoc](workspace='./vectordb') # notice how `db` is the instance that we want to serve

 if __name__ == '__main__':
-    # IMPORTANT: make sure to protect this part of the code using __main__ guard
-    with db.serve() as service:
+    # make sure to protect this part of the code
+    with app.serve() as service:
        service.block()
 ```

-Next, follow these steps to deploy your instance:

-1. If you haven't already, sign up for a [Jina AI Cloud](https://cloud.jina.ai/) account.
+| Description | Command | 
+| --- | ---: |
+| Serve your app locally | `vectordb serve --db example:db` |
+| Deploy your app on JCloud |`vectordb deploy --db example:db` |

-2. Use the `jc` command line to login to your Jina AI Cloud account:

-```bash
-jc login
-```
+## :cloud: Deploy it to the cloud

-3. Deploy your instance:
+`vectordb` allows you to deploy your solution to the cloud easily. 

-```bash
-vectordb deploy --db example:db
-```
+1. First, you need to get a [Jina AI Cloud](https://cloud.jina.ai/) account

-![](./.github/images/vectordb_deploy_screenshot.png)
+2. Login to your Jina AI Cloud account using the `jc` command line:

-### Connect from the client
+```jc login```

-After deployment, use the `vectordb` Client to access the assigned endpoint:
+3. Deploy:
+  ```bash
+  vectordb deploy --db example:db
+  ```

+  <details>
+  <summary>Show command output</summary>
+
+  ```text
+  ╭──────────────┬────────────────────────────────────────────────────────────────────────────────────────────────────────╮
+  │ App ID       │                                           <id>                                                         │
+  ├──────────────┼────────────────────────────────────────────────────────────────────────────────────────────────────────┤
+  │ Phase        │                                       Serving                                                       │
+  ├──────────────┼────────────────────────────────────────────────────────────────────────────────────────────────────────┤
+  │ Endpoint     │                                 grpc://<id>.wolf.jina.ai                                               │
+  ├──────────────┼────────────────────────────────────────────────────────────────────────────────────────────────────────┤
+  │ App logs     │                                   dashboards.wolf.jina.ai                                         │
+  ╰──────────────┴────────────────────────────────────────────────────────────────────────────────────────────────────────╯
+  ```
+
+  </details>
+  
+4. Connect from Client
+
+Once deployed, you can use `vectordb` Client to access the given endpoint.
 ```python
 from vectordb import Client

-# replace the ID with the ID of your deployed DB as shown in the screenshot above
-c = Client(address='grpcs://ID.wolf.jina.ai')
+c = Client(address=' grpc://<id>.wolf.jina.ai')
 ```

-### Manage your deployed instances using [jcloud](https://github.com/jina-ai/jcloud)
+5. Manage your deployed instances using [jcloud](https://github.com/jina-ai/jcloud):
+You can then list and delete your deployed DBs with `jc` command:

-You can then list, pause, resume or delete your deployed DBs with `jc` command:
+```jc list <>```

-```jcloud list ID```
-
-![](./.github/images/vectordb_deploy_list.png)
-
-```jcloud pause ID``` or ```jcloud resume ID```
-
-![](./.github/images/vectordb_deploy_paused.png)
-
-```jcloud remove ID```
+```jc delete <>```
   
+## ⚙️ Configure

-## Advanced Topics
+Here you can find the list of parameters you can use to configure the behavior for each of the `VectorDB` types.

-### What is a vector database?
+### InMemoryExactNNVectorDB

-Vector databases serve as sophisticated repositories for embeddings, capturing the essence of semantic similarity among disparate objects. These databases facilitate similarity searches across a myriad of multimodal data types, paving the way for a new era of information retrieval. By providing contextual understanding and enriching generation results, vector databases greatly enhance the performance and utility of Language Learning Models (LLM). This underscores their pivotal role in the evolution of data science and machine learning applications.
+This database type does an exhaustive search on the embeddings and therefore has a very limited configuration setting:

-### CRUD support
-
-Both the local library usage and the client-server interactions in `vectordb` share the same API. This provides `index`, `search`, `update`, and `delete` functionalities:
-
- `index`: Accepts a `DocList` to index.
- `search`: Takes a `DocList` of batched queries or a single `BaseDoc` as a single query. It returns either single or multiple results, each with `matches` and `scores` attributes sorted by `relevance`.
- `delete`: Accepts a `DocList` of documents to remove from the index. Only the `id` attribute is necessary, so make sure to track the `indexed` `IDs` if you need to delete documents.
- `update`: Accepts a `DocList` of documents to update in the index. The `update` operation will replace the `indexed` document with the same index with the attributes and payload from the input documents.
-
-### Service endpoint configuration
-
-You can serve `vectordb` and access it from a client with the following parameters:
-
- protocol: The serving protocol. It can be `gRPC`, `HTTP`, `websocket` or a combination of them, provided as a list. Default is `gRPC`.
- port: The service access port. Can be a list of ports for each provided protocol. Default is 8081.
- workspace: The path where the VectorDB persists required data. Default is '.' (current directory).
-
-### Scaling your DB
-
-You can set two scaling parameters when serving or deploying your Vector Databases with `vectordb`:
-
- Shards: The number of data shards. This improves latency, as `vectordb` ensures Documents are indexed in only one of the shards. Search requests are sent to all shards and results are merged.
- Replicas: The number of DB replicas. `vectordb` uses the [RAFT](https://raft.github.io/) algorithm to sync the index between replicas of each shard. This increases service availability and search throughput, as multiple replicas can respond in parallel to more search requests while allowing CRUD operations. Note: In JCloud deployments, the number of replicas is set to 1. We're working on enabling replication in the cloud.
-
-### Vector search configuration
-
-Here are the parameters for each `VectorDB` type:
-
-#### InMemoryExactNNVectorDB
-
-This database performs exhaustive search on embeddings and has limited configuration settings:
-
- `workspace`: The folder where required data is persisted.
+- workspace: The folder where the required data will be persisted.

 ```python
 InMemoryExactNNVectorDB[MyDoc](workspace='./vectordb')
 InMemoryExactNNVectorDB[MyDoc].serve(workspace='./vectordb')
 ```

-#### HNSWVectorDB
+### HNSWVectorDB

-This database employs the HNSW (Hierarchical Navigable Small World) algorithm from [HNSWLib](https://github.com/nmslib/hnswlib) for Approximate Nearest Neighbor search. It provides several configuration options:
+This database implements Approximate Nearest Neighbour based on HNSW algorithm using [HNSWLib](https://github.com/nmslib/hnswlib).

- `workspace`: Specifies the directory where required data is stored and persisted.
+It containes more configuration options:

-Additionally, HNSWVectorDB offers a set of configurations that allow tuning the performance and accuracy of the Nearest Neighbor search algorithm. Detailed descriptions of these configurations can be found in the [HNSWLib README](https://github.com/nmslib/hnswlib):
+ - workspace: The folder where the required data will be persisted.
+ 
+Then a set of configurations that tweak the performance and accuracy of the NN search algorithm. You can find more details in [HNSWLib README](https://github.com/nmslib/hnswlib)

- `space`: Specifies the similarity metric used for the space (options are "l2", "ip", or "cosine"). The default is "l2".
- `max_elements`: Sets the initial capacity of the index, which can be increased dynamically. The default is 1024.
- `ef_construction`: This parameter controls the speed/accuracy trade-off during index construction. The default is 200.
- `ef`: This parameter controls the query time/accuracy trade-off. The default is 10.
- `M`: This parameter defines the maximum number of outgoing connections in the graph. The default is 16.
- `allow_replace_deleted`: If set to `True`, this allows replacement of deleted elements with newly added ones. The default is `False`.
- `num_threads`: This sets the default number of threads to be used during `index` and `search` operations. The default is 1.
+- space: name of the space, related to the similarity metric used (can be one of "l2", "ip", or "cosine"), default: "l2"
+- max_elements: Initial capacity of the index, which is increased dynamically, default: 1024,
+- ef_construction: parameter that controls speed/accuracy trade-off during the index construction, default: 200,
+- ef: parameter controlling query time/accuracy trade-off, default: 10,
+- M: parameter that defines the maximum number of outgoing connections in the graph, default: 16.
+- allow_replace_deleted: enables replacing of deleted elements with new added ones, default: False
+- num_threads: default number of threads to use while `index` and `search` are used, default: 1

+## 🛣️ Roadmap

+We have big plans for the future of Vector Database! Here are some of the features we have in the works:

-### Command line interface
+- Further configuration of ANN algorithms.
+- More ANN search algorithms: We want to support more ANN search algorithms.
+- Filter capacity: We want to support filtering for our offered ANN Search solutions.
+- Customizable: We want to make it easy for users to customize the behavior for their specific needs in an easy way for Python developers.

-`vectordb` includes a simple CLI for serving and deploying your database:
+- Serverless capacity: We're working on adding serverless capacity to `vectordb` in the cloud. We currenly allow to scale between 0 and 1 replica, we aim to offer from 0 to N.
+- More deploying options: We want to enable deploying `vectordb` on different clouds with more options

-| Description                     | Command | 
-|---------------------------------| ---: |
-| Serve your DB locally           | `vectordb serve --db example:db` |
-| Deploy your DB on Jina AI Cloud |`vectordb deploy --db example:db` |
-
-
-
-## Features
-
- **User-friendly Interface:** With `vectordb`, simplicity is key. Its intuitive interface is designed to accommodate users across varying levels of expertise.
-
- **Minimalistic Design:** `vectordb` packs all the essentials, with no unnecessary complexity. It ensures a seamless transition from local to server and cloud deployment.
-
- **Full CRUD Support:** From indexing and searching to updating and deleting, `vectordb` covers the entire spectrum of CRUD operations.
-
- **DB as a Service:** Harness the power of gRPC, HTTP, and Websocket protocols with `vectordb`. It enables you to serve your databases and conduct insertion or searching operations efficiently.
-
- **Scalability:** Experience the raw power of `vectordb`'s deployment capabilities, including robust scalability features like sharding and replication. Improve your service latency with sharding, while replication enhances availability and throughput.
-
- **Cloud Deployment:** Deploying your service in the cloud is a breeze with [Jina AI Cloud](https://cloud.jina.ai/). More deployment options are coming soon!
-
- **Serverless Capability:** `vectordb` can be deployed in a serverless mode in the cloud, ensuring optimal resource utilization and data availability as per your needs.
-
- **Multiple ANN Algorithms:** `vectordb` offers diverse implementations of Approximate Nearest Neighbors (ANN) algorithms. Here are the current offerings, with more integrations on the horizon:
-   - InMemoryExactNNVectorDB (Exact NN Search): Implements Simple Nearest Neighbor Algorithm.
-   - HNSWVectorDB (based on HNSW): Utilizes [HNSWLib](https://github.com/nmslib/hnswlib)
-
-
-<!--(THIS CAN BE SHOWN WHEN FILTER IS ENABLED)- Filter capacity: `vectordb` allows you to have filters on top of the ANN search. -->
-
-<!--(THIS CAN BE SHOWN WHEN FILTER IS ENABLED)- Customizable: `vectordb` can be easily extended to suit your specific needs or schemas, so you can build the database you want and for any input and output schema you want with the help of [DocArray](https://github.com/docarray/docarray).-->
-
-## Roadmap
-
-The future of Vector Database looks bright, and we have ambitious plans! Here's a sneak peek into the features we're currently developing:
-
- More ANN Search Algorithms: Our goal is to support an even wider range of ANN search algorithms.
- Enhanced Filtering Capabilities: We're working on enhancing our ANN Search solutions to support advanced filtering.
- Customizability: We aim to make `vectordb` highly customizable, allowing Python developers to tailor its behavior to their specific needs with ease.
- Expanding Serverless Capacity: We're striving to enhance the serverless capacity of `vectordb` in the cloud. While we currently support scaling between 0 and 1 replica, our goal is to extend this to 0 to N replicas.
- Expanded Deployment Options: We're actively working on facilitating the deployment of `vectordb` across various cloud platforms, with a broad range of options.
-
-Need help with `vectordb`? Interested in using it but require certain features to meet your unique needs? Don't hesitate to reach out to us. Join our [Discord community](https://discord.jina.ai) to chat with us and other community members.
+If you need any help with `vectordb`, or you are interested on using it and have some requests to make it fit your own need. don't hesitate to reach out to us. You can join our [Slack community](https://jina.ai/slack) and chat with us and other community members.

 ## Contributing

-The VectorDB project is backed by [Jina AI](https://jina.ai) and licensed under Apache-2.0. Contributions from the community are greatly appreciated! If you have an idea for a new feature or an improvement, we would love to hear from you. We're always looking for ways to make `vectordb` more user-friendly and effective.
-
+We welcome contributions from the community! If you have an idea for a new feature or improvement, please let us know. We're always looking for ways to make `vectordb` better for our users.

--- a/aux.py
+++ b/aux.py
@ -0,0 +1,52 @@
+from docarray import DocList, BaseDoc
+from docarray.documents import TextDoc
+from docarray.typing import NdArray
+from vectordb import HNSWVectorDB
+from vectordb import InMemoryExactNNVectorDB
+import numpy as np
+
+
+class MyDoc(BaseDoc):
+    text: str
+    embedding: NdArray[128]
+
+docs = [MyDoc(text='hey', embedding=np.random.rand(128)) for i in range(200)]
+indexer = InMemoryExactNNVectorDB[MyDoc]()
+indexer.index(docs=DocList[MyDoc](docs))
+resp = indexer.search(docs=DocList[MyDoc](docs[0:3]))
+print(f' resp {resp}')
+for query in resp:
+    print(f' query matches {query.matches}')
+    print(f' query matches scores {query.scores}')
+
+service = InMemoryExactNNVectorDB[MyDoc].serve()
+with service:
+    service.index(inputs=DocList[MyDoc](docs))
+    resp = service.search(inputs=DocList[MyDoc](docs[0:3]))
+    print(f' resp {resp}')
+    for query in resp:
+        print(f' query matches {query.matches}')
+        print(f' query matches scores {query.scores}')
+
+# indexer = HNSWVectorDB[MyDoc]()
+indexer.index(docs=DocList[MyDoc](docs))
+resp = indexer.search(docs=DocList[MyDoc](docs[0:3]))
+print(f' resp {resp}')
+for query in resp:
+    print(f' query matches {query.matches}')
+    print(f' query matches scores {query.scores}')
+
+service = HNSWVectorDB[MyDoc].serve()
+with service:
+    service.index(inputs=DocList[MyDoc](docs))
+    resp = service.search(inputs=DocList[MyDoc](docs[0:3]))
+    print(f' resp {resp}')
+    for query in resp:
+        print(f' query matches {query.matches}')
+        print(f' query matches scores {query.scores}')
+
+
+from vectordb.utils.create_doc_type import create_output_doc_type
+
+o = create_output_doc_type(MyDoc)
+print(f' {o}')
--- a/example.py
+++ b/example.py
@ -1,23 +0,0 @@
-from docarray import DocList, BaseDoc
-from docarray.typing import NdArray
-from vectordb import InMemoryExactNNVectorDB, HNSWVectorDB
-import random
-import string
-import numpy as np
-
-class MyDoc(BaseDoc):
-    text: str
-    embedding: NdArray[128]
-
-
-db = InMemoryExactNNVectorDB[MyDoc](workspace='./workspace')
-
-if __name__ == '__main__':
-    docs_to_index = DocList[MyDoc](
-        [MyDoc(text="".join(random.choice(string.ascii_lowercase) for _ in range(5)), embedding=np.random.rand(128))
-         for _ in range(2000)])
-    query = docs_to_index[100:200]
-    with HNSWVectorDB[MyDoc].serve(workspace='./workspace', replicas=1, shards=1,
-                                            protocol='grpc') as service:
-        service.index(inputs=docs_to_index)
-        resp = service.search(inputs=query, limit=3)
--- a/requirements.txt
+++ b/requirements.txt
@ -1,2 +1,3 @@
-jina>=3.20.0
-docarray[hnswlib]>=0.34.0
+jina>=3.17.0
+click
+#docarray[hnswlib]>=0.33.0
--- a/scripts/get-all-test-paths.sh
+++ b/scripts/get-all-test-paths.sh
@ -7,13 +7,16 @@ DEFAULT_BATCH_SIZE=5
 BATCH_SIZE="${2:-$DEFAULT_BATCH_SIZE}"

 declare -a unit_tests=($(find tests/unit -name "test_*.py"))
-declare -a integration_tests=($(find tests/integration -name "test_*.py"))
+declare -a integration_tests_local=($(find tests/integration/local -name "test_*.py"))
+declare -a integration_tests_jcloud=($(find tests/integration/jcloud -name "test_*.py"))
 declare -a all_tests=("${unit_tests[@]}" "${integration_tests[@]}")

 if [ "$TEST_SUITE" == "unit" ]; then
    dest="$(echo "${unit_tests[@]}" | xargs -n$BATCH_SIZE)"
-elif [[ "$TEST_SUITE" == "integration" ]]; then
-    dest="$(echo "${integration_tests[@]}" | xargs -n$BATCH_SIZE)"
+elif [[ "$TEST_SUITE" == "integration_local" ]]; then
+    dest="$(echo "${integration_tests_local[@]}" | xargs -n$BATCH_SIZE)"
+elif [[ "$TEST_SUITE" == "integration_jcloud" ]]; then
+    dest="$(echo "${integration_tests_jcloud[@]}" | xargs -n$BATCH_SIZE)"
 else
    dest="$(echo "${all_tests[@]}" | xargs -n$BATCH_SIZE)"
 fi
--- a/setup.py
+++ b/setup.py
@ -1,15 +1,10 @@
 from setuptools import setup, find_packages
 from os import path
-AUTHOR = 'Jina AI'
-AUTHOR_EMAIL = 'hello@jina.ai'
-LICENSE = 'Apache 2.0'
-GITHUB_URL = 'https://github.com/jina-ai/vectordb/'
-DOWNLOAD_URL = 'https://github.com/jina-ai/vectordb/tags'
+
 try:
    pkg_name = 'vectordb'
    libinfo_py = path.join(pkg_name, '__init__.py')
-    with open(libinfo_py, 'r', encoding='utf-8') as f:
-        libinfo_content = f.readlines()
+    libinfo_content = open(libinfo_py, 'r', encoding='utf-8').readlines()
    version_line = [l.strip() for l in libinfo_content if l.startswith('__version__')][
        0
    ]
@ -17,28 +12,19 @@ try:
 except FileNotFoundError:
    __version__ = '0.0.0'

-try:
-    with open('README.md', encoding='utf-8') as fp:
-        _long_description = fp.read()
-except FileNotFoundError:
-    _long_description = ''
-
-
 # Read the contents of requirements.txt
-with open(path.join(path.dirname(__file__), 'requirements.txt'), 'r') as f:
+with open('requirements.txt', 'r') as f:
    requirements = f.read().splitlines()

 setup(
    name='vectordb',
    version=__version__,
    description='The Python VectorDB. Build your vector database from working as a library to scaling as a database in the cloud',
-    long_description=_long_description,
-    long_description_content_type='text/markdown',
-    author= AUTHOR,
-    author_email=AUTHOR_EMAIL,
-    license=LICENSE,
-    url=GITHUB_URL,
-    download_url=DOWNLOAD_URL,
+    author='Jina AI',
+    author_email='hello@jina.ai',
+    license='Apache 2.0',
+    url='https://github.com/jina-ai/vectordb/',
+    download_url='https://github.com/jina-ai/vectordb/tags',
    packages=find_packages(),
    classifiers=[
        'Development Status :: 5 - Production/Stable',
@ -54,17 +40,17 @@ setup(
    python_requires='>=3.7',
    entry_points={
        'console_scripts': [
-            'vectordb=vectordb.__main__:vectordb',
+            'vectordb=vectordb.__main__:deploy',
        ],
    },
    extras_require={
        'test': [
            'pytest',
            'pytest-asyncio',
-            'pytest-repeat',
-            'flaky',
-            'pytest-timeout'
        ],
    },
    install_requires=requirements,
 )
+
+import subprocess
+subprocess.run(['pip', 'install', 'docarray[hnswlib]>=0.33.0'])
--- a/tests/conftest.py
+++ b/tests/conftest.py
@ -1,11 +1,5 @@
 import pytest

-
@pytest.fixture(autouse=True)
 def test_log_level(monkeypatch):
    monkeypatch.setenv('JINA_LOG_LEVEL', 'DEBUG')
-
-
-@pytest.fixture(autouse=True)
-def test_disable_telemetry(monkeypatch):
-    monkeypatch.setenv('JINA_OPTOUT_TELEMETRY', 'True')
--- a/tests/integration/service/init.py
+++ b/tests/integration/service/init.py
--- a/tests/integration/service/test_hnswlib_vectordb_serve.py
+++ b/tests/integration/service/test_hnswlib_vectordb_serve.py
@ -1,4 +1,3 @@
-import multiprocessing
 import pytest
 import random
 import time
@ -8,7 +7,6 @@ import numpy as np
 from docarray import DocList, BaseDoc
 from docarray.typing import NdArray
 from vectordb import HNSWVectorDB
-from jina.helper import random_port


 class MyDoc(BaseDoc):
@ -23,15 +21,13 @@ def docs_to_index():
         for _ in range(2000)])


-@pytest.mark.timeout(180)
@pytest.mark.parametrize('shards', [1, 2])
@pytest.mark.parametrize('replicas', [1, 3])
@pytest.mark.parametrize('protocol', ['grpc', 'http', 'websocket'])
 def test_hnswlib_vectordb_batch(docs_to_index, replicas, shards, protocol, tmpdir):
    query = docs_to_index[:10]
-    port = random_port()
-    with HNSWVectorDB[MyDoc](workspace=str(tmpdir)).serve(workspace=str(tmpdir), replicas=replicas, shards=shards, protocol=protocol, port=port,
-                                   uses_with={'ef': 5000}, timeout_ready=10000) as db:
+    with HNSWVectorDB[MyDoc].serve(workspace=str(tmpdir), replicas=replicas, shards=shards, protocol=protocol,
+                                   uses_with={'ef': 5000}) as db:
        db.index(inputs=docs_to_index)
        if replicas > 1:
            time.sleep(2)
@ -44,16 +40,14 @@ def test_hnswlib_vectordb_batch(docs_to_index, replicas, shards, protocol, tmpdi
            assert res.scores[0] < 0.001  # some precision issues, should be 0.0


-@pytest.mark.timeout(270)
@pytest.mark.parametrize('limit', [1, 10, 2000, 2500])
@pytest.mark.parametrize('shards', [1, 2])
@pytest.mark.parametrize('replicas', [1, 3])
@pytest.mark.parametrize('protocol', ['grpc', 'http', 'websocket'])
 def test_hnswlib_vectordb_single_query(docs_to_index, limit, replicas, shards, protocol, tmpdir):
    query = docs_to_index[100]
-    port = random_port()
-    with HNSWVectorDB[MyDoc](ef=5000).serve(workspace=str(tmpdir), replicas=replicas, shards=shards, port=port,
-                                            protocol=protocol, timeout_ready=10000) as db:
+    with HNSWVectorDB[MyDoc](ef=5000).serve(workspace=str(tmpdir), replicas=replicas, shards=shards,
+                                            protocol=protocol) as db:
        db.index(inputs=docs_to_index)
        if replicas > 1:
            time.sleep(2)
@ -64,16 +58,14 @@ def test_hnswlib_vectordb_single_query(docs_to_index, limit, replicas, shards, p
        assert resp.scores[0] < 0.001  # some precision issues, should be 0.0


-@pytest.mark.timeout(180)
@pytest.mark.parametrize('shards', [1, 2])
@pytest.mark.parametrize('replicas', [1, 3])
@pytest.mark.parametrize('protocol', ['grpc', 'http', 'websocket'])
 def test_hnswlib_vectordb_delete(docs_to_index, replicas, shards, protocol, tmpdir):
    query = docs_to_index[0]
-    port = random_port()
    delete = MyDoc(id=query.id, text='', embedding=np.random.rand(128))
-    with HNSWVectorDB[MyDoc](workspace=str(tmpdir)).serve(workspace=str(tmpdir), replicas=replicas, shards=shards, protocol=protocol, port=port,
-                                   uses_with={'ef': 5000}, timeout_ready=10000) as db:
+    with HNSWVectorDB[MyDoc].serve(workspace=str(tmpdir), replicas=replicas, shards=shards, protocol=protocol,
+                                   uses_with={'ef': 5000}) as db:
        db.index(inputs=docs_to_index)
        if replicas > 1:
            time.sleep(2)
@ -94,16 +86,14 @@ def test_hnswlib_vectordb_delete(docs_to_index, replicas, shards, protocol, tmpd
        assert resp.text != resp.matches[0].text


-@pytest.mark.timeout(180)
@pytest.mark.parametrize('shards', [1, 2])
@pytest.mark.parametrize('replicas', [1, 3])
@pytest.mark.parametrize('protocol', ['grpc', 'http', 'websocket'])
 def test_hnswlib_vectordb_udpate_text(docs_to_index, replicas, shards, protocol, tmpdir):
    query = docs_to_index[0]
-    port = random_port()
    update = MyDoc(id=query.id, text=query.text + '_changed', embedding=query.embedding)
-    with HNSWVectorDB[MyDoc]().serve(workspace=str(tmpdir), replicas=replicas, shards=shards, protocol=protocol, port=port,
-                                   uses_with={'ef': 5000}, timeout_ready=10000) as db:
+    with HNSWVectorDB[MyDoc].serve(workspace=str(tmpdir), replicas=replicas, shards=shards, protocol=protocol,
+                                   uses_with={'ef': 5000}) as db:
        db.index(inputs=docs_to_index)
        if replicas > 1:
            time.sleep(2)
@ -123,16 +113,14 @@ def test_hnswlib_vectordb_udpate_text(docs_to_index, replicas, shards, protocol,
        assert resp.matches[0].text == resp.text + '_changed'


-@pytest.mark.timeout(180)
@pytest.mark.parametrize('shards', [1, 2])
@pytest.mark.parametrize('replicas', [1, 3])
@pytest.mark.parametrize('protocol', ['grpc', 'http', 'websocket'])
 def test_hnswlib_vectordb_restore(docs_to_index, replicas, shards, protocol, tmpdir):
    query = docs_to_index[:100]
-    port = random_port()

-    with HNSWVectorDB[MyDoc](ef=5000).serve(workspace=str(tmpdir), replicas=replicas, shards=shards, port=port,
-                                            protocol=protocol, timeout_ready=10000) as db:
+    with HNSWVectorDB[MyDoc](ef=5000).serve(workspace=str(tmpdir), replicas=replicas, shards=shards,
+                                            protocol=protocol) as db:
        db.index(docs=docs_to_index)
        if replicas > 1:
            time.sleep(2)
@ -144,8 +132,8 @@ def test_hnswlib_vectordb_restore(docs_to_index, replicas, shards, protocol, tmp
            assert res.text == res.matches[0].text
            assert res.scores[0] < 0.001  # some precision issues, should be 0.0

-    with HNSWVectorDB[MyDoc]().serve(workspace=str(tmpdir), replicas=replicas, shards=shards, port=port,
-                                   protocol=protocol, uses_with={'ef': 5000}, timeout_ready=10000) as new_db:
+    with HNSWVectorDB[MyDoc].serve(workspace=str(tmpdir), replicas=replicas, shards=shards,
+                                   protocol=protocol, uses_with={'ef': 5000}) as new_db:
        time.sleep(2)
        resp = new_db.search(docs=query)
        assert len(resp) == len(query)
--- a/tests/integration/service/test_inmemory_vectordb_serve.py
+++ b/tests/integration/service/test_inmemory_vectordb_serve.py
@ -1,4 +1,3 @@
-import multiprocessing
 import pytest
 import random
 import string
@ -8,7 +7,6 @@ import numpy as np
 from docarray import DocList, BaseDoc
 from docarray.typing import NdArray
 from vectordb import InMemoryExactNNVectorDB
-from jina.helper import random_port


 class MyDoc(BaseDoc):
@ -23,15 +21,13 @@ def docs_to_index():
         for _ in range(2000)])


-@pytest.mark.timeout(180)
@pytest.mark.parametrize('shards', [1, 2])
@pytest.mark.parametrize('replicas', [1, 3])
@pytest.mark.parametrize('protocol', ['grpc', 'http', 'websocket'])
 def test_inmemory_vectordb_batch(docs_to_index, replicas, shards, protocol, tmpdir):
    query = docs_to_index[:10]
-    port = random_port()
-    with InMemoryExactNNVectorDB[MyDoc](workspace=str(tmpdir)).serve(replicas=replicas, shards=shards, port=port,
-                                              protocol=protocol, timeout_ready=10000) as db:
+    with InMemoryExactNNVectorDB[MyDoc].serve(workspace=str(tmpdir), replicas=replicas, shards=shards,
+                                              protocol=protocol) as db:
        db.index(inputs=docs_to_index)
        if replicas > 1:
            time.sleep(2)
@ -44,16 +40,14 @@ def test_inmemory_vectordb_batch(docs_to_index, replicas, shards, protocol, tmpd
            assert res.scores[0] > 0.99  # some precision issues, should be 1.0


-@pytest.mark.timeout(270)
@pytest.mark.parametrize('limit', [1, 10, 2000, 2500])
@pytest.mark.parametrize('shards', [1, 2])
@pytest.mark.parametrize('replicas', [1, 3])
@pytest.mark.parametrize('protocol', ['grpc', 'http', 'websocket'])
 def test_inmemory_vectordb_single_query(docs_to_index, limit, replicas, shards, protocol, tmpdir):
    query = docs_to_index[100]
-    port = random_port()
-    with InMemoryExactNNVectorDB[MyDoc](workspace=str(tmpdir)).serve(workspace=str(tmpdir), replicas=replicas, shards=shards, port=port,
-                                              protocol=protocol, timeout_ready=10000) as db:
+    with InMemoryExactNNVectorDB[MyDoc].serve(workspace=str(tmpdir), replicas=replicas, shards=shards,
+                                              protocol=protocol) as db:
        db.index(inputs=docs_to_index)
        if replicas > 1:
            time.sleep(2)
@ -64,16 +58,14 @@ def test_inmemory_vectordb_single_query(docs_to_index, limit, replicas, shards,
        assert resp.scores[0] > 0.99  # some precision issues, should be 1.0


-@pytest.mark.timeout(180)
@pytest.mark.parametrize('shards', [1, 2])
@pytest.mark.parametrize('replicas', [1, 3])
@pytest.mark.parametrize('protocol', ['grpc', 'http', 'websocket'])
 def test_inmemory_vectordb_delete(docs_to_index, replicas, shards, protocol, tmpdir):
    query = docs_to_index[0]
-    port = random_port()
    delete = MyDoc(id=query.id, text='', embedding=np.random.rand(128))
-    with InMemoryExactNNVectorDB[MyDoc]().serve(workspace=str(tmpdir), replicas=replicas, shards=shards, port=port,
-                                              protocol=protocol, timeout_ready=10000) as db:
+    with InMemoryExactNNVectorDB[MyDoc].serve(workspace=str(tmpdir), replicas=replicas, shards=shards,
+                                              protocol=protocol) as db:
        db.index(inputs=docs_to_index)
        if replicas > 1:
            time.sleep(2)
@ -94,16 +86,14 @@ def test_inmemory_vectordb_delete(docs_to_index, replicas, shards, protocol, tmp
        assert resp.text != resp.matches[0].text


-@pytest.mark.timeout(180)
@pytest.mark.parametrize('shards', [1, 2])
@pytest.mark.parametrize('replicas', [1, 3])
@pytest.mark.parametrize('protocol', ['grpc', 'http', 'websocket'])
 def test_inmemory_vectordb_udpate_text(docs_to_index, replicas, shards, protocol, tmpdir):
    query = docs_to_index[0]
-    port = random_port()
    update = MyDoc(id=query.id, text=query.text + '_changed', embedding=query.embedding)
-    with InMemoryExactNNVectorDB[MyDoc]().serve(workspace=str(tmpdir), replicas=replicas, shards=shards, port=port,
-                                              protocol=protocol, timeout_ready=10000) as db:
+    with InMemoryExactNNVectorDB[MyDoc].serve(workspace=str(tmpdir), replicas=replicas, shards=shards,
+                                              protocol=protocol) as db:
        db.index(inputs=docs_to_index)
        if replicas > 1:
            time.sleep(2)
@ -123,16 +113,14 @@ def test_inmemory_vectordb_udpate_text(docs_to_index, replicas, shards, protocol
        assert resp.matches[0].text == resp.text + '_changed'


-@pytest.mark.timeout(180)
@pytest.mark.parametrize('shards', [1, 2])
@pytest.mark.parametrize('replicas', [1, 3])
@pytest.mark.parametrize('protocol', ['grpc', 'http', 'websocket'])
 def test_inmemory_vectordb_restore(docs_to_index, replicas, shards, protocol, tmpdir):
    query = docs_to_index[:100]
-    port = random_port()

-    with InMemoryExactNNVectorDB[MyDoc](workspace=str(tmpdir)).serve(replicas=replicas, shards=shards, port=port,
-                                              protocol=protocol, timeout_ready=10000) as db:
+    with InMemoryExactNNVectorDB[MyDoc].serve(workspace=str(tmpdir), replicas=replicas, shards=shards,
+                                              protocol=protocol) as db:
        db.index(docs=docs_to_index)
        if replicas > 1:
            time.sleep(2)
@ -144,8 +132,8 @@ def test_inmemory_vectordb_restore(docs_to_index, replicas, shards, protocol, tm
            assert res.text == res.matches[0].text
            assert res.scores[0] > 0.99  # some precision issues, should be 1

-    with InMemoryExactNNVectorDB[MyDoc](workspace=str(tmpdir)).serve(replicas=replicas, shards=shards, port=port,
-                                              protocol=protocol, timeout_ready=10000) as new_db:
+    with InMemoryExactNNVectorDB[MyDoc].serve(workspace=str(tmpdir), replicas=replicas, shards=shards,
+                                              protocol=protocol) as new_db:
        time.sleep(2)
        resp = new_db.search(docs=query)
        assert len(resp) == len(query)
--- a/tests/unit/test_hnswlib_vectordb.py
+++ b/tests/unit/test_hnswlib_vectordb.py
@ -169,20 +169,4 @@ def test_hnswlib_vectordb_restore(docs_to_index, tmpdir):
        assert len(res.matches) == 10
        # assert res.id == res.matches[0].id
        # assert res.text == res.matches[0].text
-        # assert res.scores[0] < 0.001 # some precision issues, should be 0
-
-def test_hnswlib_num_dos(tmpdir):
-    db = HNSWVectorDB[MyDoc](workspace=str(tmpdir))
-    doc_list = [MyDoc(text=f'toy doc {i}', embedding=np.random.rand(128)) for i in range(1000)]
-    db.index(inputs=DocList[MyDoc](doc_list))
-    x = db.num_docs()
-    assert x['num_docs'] == 1000
-
-def test_hnswlib_query_id(tmpdir):
-    db = HNSWVectorDB[MyDoc](workspace=str(tmpdir))
-    doc_list = [MyDoc(id='test_1',text=f'test', embedding=np.random.rand(128)) ]
-    db.index(inputs=DocList[MyDoc](doc_list))
-    queryobjtest1 = db.get_by_id('test_1')
-    queryobjtest2 = db.get_by_id('test_2')
-    assert queryobjtest2 is None
-    assert queryobjtest1.id == 'test_1'
+        # assert res.scores[0] < 0.001 # some precision issues, should be 0
--- a/tests/unit/test_inmemory_vectordb.py
+++ b/tests/unit/test_inmemory_vectordb.py
@ -172,19 +172,3 @@ def test_inmemory_vectordb_restore(docs_to_index, tmpdir):
        assert res.id == res.matches[0].id
        assert res.text == res.matches[0].text
        assert res.scores[0] > 0.99 # some precision issues, should be 1
-        
-def test_inmemory_num_dos(tmpdir):
-    db = InMemoryExactNNVectorDB[MyDoc](workspace=str(tmpdir))
-    doc_list = [MyDoc(text=f'toy doc {i}', embedding=np.random.rand(128)) for i in range(1000)]
-    db.index(inputs=DocList[MyDoc](doc_list))
-    x = db.num_docs()
-    assert x['num_docs'] == 1000
-
-def test_inmemory_query_id(tmpdir):
-    db = InMemoryExactNNVectorDB[MyDoc](workspace=str(tmpdir))
-    doc_list = [MyDoc(id='test_1',text=f'test', embedding=np.random.rand(128)) ]
-    db.index(inputs=DocList[MyDoc](doc_list))
-    queryobjtest1 = db.get_by_id('test_1')
-    queryobjtest2 = db.get_by_id('test_2')
-    assert queryobjtest2 is None
-    assert queryobjtest1.id == 'test_1'
--- a/vectordb/init.py
+++ b/vectordb/init.py
@ -1,19 +1,4 @@
-def _ignore_warnings():
-    import logging
-    import warnings
+__version__ = '0.0.1'

-    logging.captureWarnings(True)
-    warnings.filterwarnings(
-        "ignore",
-        category=DeprecationWarning,
-        message="Deprecated call to `pkg_resources.declare_namespace('google')`.",
-    )
-
-
-_ignore_warnings()
-
-__version__ = '0.0.22'
-
-from vectordb.client import Client
-from vectordb.db.hnsw_vectordb import HNSWVectorDB
 from vectordb.db.inmemory_exact_vectordb import InMemoryExactNNVectorDB
+from vectordb.db.hnsw_vectordb import HNSWVectorDB
--- a/vectordb/main.py
+++ b/vectordb/main.py
@ -11,7 +11,7 @@ def vectordb():
    pass


-@vectordb.command(help='Deploy a vectorDB db to Jina AI Cloud')
+@vectordb.command(help='Deploy a vectorDB app to Jina AI Cloud')
@click.option(
    '--db',
    '--app',
@ -39,16 +39,14 @@ def vectordb():
 )
 def deploy(db, protocol, shards):
    definition_file, _, obj_name = db.partition(":")
-    if not definition_file.endswith('.py'):
-        definition_file = f'{definition_file}.py'
    protocol = protocol.split(',')
-    VectorDB.deploy(protocol=protocol,
+    VectorDB.deploy(protoocl=protocol,
                    shards=shards,
                    definition_file=definition_file,
                    obj_name=obj_name)


-@vectordb.command(help='Locally serve a vectorDB db')
+@vectordb.command(help='Deploy a vectorDB app to Jina AI Cloud')
@click.option(
    '--db',
    '--app',
--- a/vectordb/client/init.py
+++ b/vectordb/client/init.py
@ -1 +0,0 @@
-from vectordb.client.client import Client
--- a/vectordb/db/base.py
+++ b/vectordb/db/base.py
@ -52,10 +52,7 @@ class VectorDB(Generic[TSchema]):
            self._workspace = kwargs['work_dir']
        if 'workspace' in kwargs:
            self._workspace = kwargs['workspace']
-        self._uses_with = {}
-        self._uses_with.update(**kwargs)
-        if 'workspace' in self._uses_with:
-            self._uses_with.pop('workspace')
+        self._uses_with = kwargs
        kwargs['requests'] = REQUESTS_MAP
        kwargs['runtime_args'] = {'workspace': self._workspace}
        self._executor = self._executor_cls(*args, **kwargs)
@ -75,6 +72,14 @@ class VectorDB(Generic[TSchema]):
                         obj_name: Optional[str] = None,
                         **kwargs):
        from jina import Deployment, Flow
+        is_instance = False
+        uses_with = uses_with or {}
+        if isinstance(cls, VectorDB):
+            is_instance = True
+            uses_with = uses_with.update(**cls._uses_with)
+
+        if is_instance:
+            workspace = workspace or cls._workspace
        replicas = replicas or 1
        shards = shards or 1
        protocol = protocol or 'grpc'
@ -121,9 +126,8 @@ class VectorDB(Generic[TSchema]):
            # here we would need to push the EXECUTOR TO HUBBLE AND CHANGE THE USES
            assert definition_file is not None, 'Trying to create a Jina Object for Deployment without the file where the vectordb object/class is defined'
            assert obj_name is not None, 'Trying to create a Jina Object for Deployment without the name of the vectordb object/class to deploy'
-            uses = f'{push_vectordb_to_hubble(vectordb_name=obj_name, definition_file_path=definition_file)}'
+            uses = f'jinaai+docker://{push_vectordb_to_hubble(vectordb_name=obj_name, definition_file_path=definition_file)}'
            use_deployment = False
-            port = 8080

        if 'websocket' in protocol_list:  # websocket not supported for Deployment
            use_deployment = False
@ -144,43 +148,39 @@ class VectorDB(Generic[TSchema]):
                                     workspace=workspace,
                                     polling=polling, **kwargs)
        else:
-            jina_object = Flow(port=port, protocol=protocol, env=['JINA_LOG_LEVEL=DEBUG'], **kwargs).add(name='indexer',
-                                                                                                         uses=uses,
-                                                                                                         uses_with=uses_with,
-                                                                                                         shards=shards,
-                                                                                                         replicas=replicas,
-                                                                                                         stateful=stateful,
-                                                                                                         peer_ports=peer_ports,
-                                                                                                         polling=polling,
-                                                                                                         workspace=workspace)
+            jina_object = Flow(port=port, protocol=protocol, **kwargs).add(name='indexer',
+                                                                           uses=uses,
+                                                                           uses_with=uses_with,
+                                                                           shards=shards,
+                                                                           replicas=replicas,
+                                                                           stateful=stateful,
+                                                                           peer_ports=peer_ports,
+                                                                           polling=polling,
+                                                                           workspace=workspace)

        return jina_object

-    def serve(self,
+    @classmethod
+    def serve(cls,
              *,
              port: Optional[Union[str, List[str]]] = 8081,
              protocol: Optional[Union[str, List[str]]] = None,
              **kwargs):
        protocol = protocol or 'grpc'
        protocol_list = [p.lower() for p in protocol] if isinstance(protocol, list) else [protocol.lower()]
-        uses_with = kwargs.pop('uses_with', {})
-        uses_with.update(self._uses_with)
-        workspace = kwargs.pop('workspace', self._workspace)
-        ctxt_manager = self._get_jina_object(to_deploy=False, port=port, protocol=protocol, workspace=workspace,
-                                             uses_with=uses_with, **kwargs)
+        ctxt_manager = cls._get_jina_object(to_deploy=False, port=port, protocol=protocol, **kwargs)
        port = port[0] if isinstance(port, list) else port
-        return Service(ctxt_manager, address=f'{protocol_list[0]}://0.0.0.0:{port}', schema=self._input_schema,
-                       reverse_order=self.reverse_score_order)
+        return Service(ctxt_manager, address=f'{protocol_list[0]}://0.0.0.0:{port}', schema=cls._input_schema,
+                       reverse_order=cls.reverse_score_order)

-    def deploy(self,
+    @classmethod
+    def deploy(cls,
               **kwargs):
        from tempfile import mkdtemp
        import os
        import yaml
        from yaml.loader import SafeLoader
-        uses_with = kwargs.pop('uses_with', {})
-        uses_with.update(self._uses_with)
-        jina_obj = self._get_jina_object(to_deploy=True, uses_with=uses_with, **kwargs)
+        jina_obj = cls._get_jina_object(to_deploy=True, **kwargs)

        tmpdir = mkdtemp()
        jina_obj.save_config(os.path.join(tmpdir, 'flow.yml'))
@ -192,10 +192,7 @@ class VectorDB(Generic[TSchema]):
        for executor in flow_dict['executors']:
            executor['jcloud'] = executor_jcloud_config

-        import docarray
-
        global_jcloud_config = {
-            'docarray': docarray.__version__,
            'labels': {
                'app': 'vectordb',
            },
@ -218,6 +215,7 @@ class VectorDB(Generic[TSchema]):
            flow_path = os.path.join(tmpdir, 'flow.yml')
            with open(flow_path, 'w') as f:
                yaml.safe_dump(flow_dict, f, sort_keys=False)
+
            cloud_flow = CloudFlow(path=flow_path)

            async def _deploy():
@ -227,17 +225,6 @@ class VectorDB(Generic[TSchema]):
            ret = asyncio.run(_deploy())
        return ret

-    def num_docs(self, **kwargs):
-        return self._executor.num_docs()
-
-    def get_by_id(self,info_id, **kwargs):
-        ret = None
-        try:
-            ret = self._executor.get_by_id(info_id)
-        except KeyError:
-            pass 
-        return ret
-    
    @pass_kwargs_as_params
    @unify_input_output
    def index(self, docs: 'DocList[TSchema]', parameters: Optional[Dict] = None, **kwargs):
--- a/vectordb/db/executors/hnsw_indexer.py
+++ b/vectordb/db/executors/hnsw_indexer.py
@ -105,11 +105,8 @@ class HNSWLibIndexer(TypedExecutor):
        return self.update(docs, *args, **kwargs)

    def num_docs(self, **kwargs):
-        return {'num_docs': self._indexer.num_docs()}
-    
-    def get_by_id(self,info_id,**kwargs):
-        return  self._indexer[info_id]
-    
+        return {'num_docs': self._index.num_docs()}
+
    def snapshot(self, snapshot_dir):
        # TODO: Maybe copy the work_dir to workspace if `handle` is False
        raise NotImplementedError('Act as not implemented')
--- a/vectordb/db/executors/inmemory_exact_indexer.py
+++ b/vectordb/db/executors/inmemory_exact_indexer.py
@ -71,11 +71,8 @@ class InMemoryExactNNIndexer(TypedExecutor):
        return self._index(docs)

    def num_docs(self, *args, **kwargs):
-        return {'num_docs': self._indexer.num_docs()}
-    
-    def get_by_id(self,info_id,**kwargs):
-        return  self._indexer[info_id]
-    
+        return {'num_docs': self._index.num_docs()}
+
    def snapshot(self, snapshot_dir):
        snapshot_file = f'{snapshot_dir}/index.bin'
        self._indexer.persist(snapshot_file)
--- a/vectordb/db/executors/typed_executor.py
+++ b/vectordb/db/executors/typed_executor.py
@ -1,6 +1,5 @@
 from jina import Executor
 from jina.serve.executors import _FunctionWithSchema
-from jina.serve.executors import __dry_run_endpoint__

 from typing import TypeVar, Generic, Type, Optional, TYPE_CHECKING
 from vectordb.utils.create_doc_type import create_output_doc_type
@ -16,6 +15,7 @@ OutputSchema = TypeVar('OutputSchema', bound='BaseDoc')

 methods = ['/index', '/update', '/delete', '/search']

+
 class TypedExecutor(Executor, Generic[InputSchema, OutputSchema]):
    # the BaseDoc that defines the schema of the store
    # for subclasses this is filled automatically
@ -27,25 +27,12 @@ class TypedExecutor(Executor, Generic[InputSchema, OutputSchema]):
        from docarray import DocList
        self._num_replicas = getattr(self.runtime_args, 'replicas', 1)
        for k, v in self._requests.items():
-            if k != __dry_run_endpoint__:
-                if k != '/search':
-                    self._requests[k] = _FunctionWithSchema(fn=self._requests[k].fn,
-                                                            is_generator=self._requests[k].is_generator,
-                                                            is_batch_docs=self._requests[k].is_batch_docs,
-                                                            is_singleton_doc=self._requests[k].is_singleton_doc,
-                                                            parameters_is_pydantic_model=self._requests[k].parameters_is_pydantic_model,
-                                                            parameters_model=self._requests[k].parameters_model,
-                                                            request_schema=DocList[self._input_schema],
-                                                            response_schema=DocList[self._input_schema])
-                else:
-                    self._requests[k] = _FunctionWithSchema(fn=self._requests[k].fn,
-                                                            is_generator=self._requests[k].is_generator,
-                                                            is_batch_docs=self._requests[k].is_batch_docs,
-                                                            is_singleton_doc=self._requests[k].is_singleton_doc,
-                                                            parameters_is_pydantic_model=self._requests[k].parameters_is_pydantic_model,
-                                                            parameters_model=self._requests[k].parameters_model,
-                                                            request_schema=DocList[self._input_schema],
-                                                            response_schema=DocList[self._output_schema])
+            if k != '/search':
+                self._requests[k] = _FunctionWithSchema(self._requests[k].fn, DocList[self._input_schema],
+                                                        DocList[self._input_schema])
+            else:
+                self._requests[k] = _FunctionWithSchema(self._requests[k].fn, DocList[self._input_schema],
+                                                        DocList[self._output_schema])

    @property
    def handle_persistence(self):
--- a/vectordb/utils/create_doc_type.py
+++ b/vectordb/utils/create_doc_type.py
@ -15,8 +15,7 @@ def create_output_doc_type(input_doc_type: Type['BaseDoc']):
    return create_model(
        input_doc_type.__name__ + 'WithMatchesAndScores',
        __base__=input_doc_type,
-        # NOTE: With pydantic>=2, __validators__ does not exist
-        __validators__=getattr(input_doc_type, "__validators__", None),
+        __validators__=input_doc_type.__validators__,
        matches=(DocList[input_doc_type], []),
        scores=(List[float], [])
    )
--- a/vectordb/utils/push_to_hubble.py
+++ b/vectordb/utils/push_to_hubble.py
@ -11,7 +11,7 @@ from pathlib import Path
 __resources_path__ = os.path.join(
    Path(os.path.dirname(sys.modules['vectordb'].__file__)).parent.absolute(), 'resources'
 )
-API_URL = "https://apihubble.jina.ai/v2/executor/getMeta?id={id}&tag={tag}"
+

 class EnvironmentVarCtxtManager:
    """a class to wrap env vars"""
@ -40,25 +40,6 @@ class EnvironmentVarCtxtManager:
            os.unsetenv(key)


-def get_uri(id: str, tag: str):
-    import requests
-    from hubble import Auth
-
-    headers = {"Authorization": f"token {Auth.get_auth_token()}"}
-    response = requests.get(API_URL.format(id=id, tag=tag), headers=headers)
-
-    if response.status_code != 200:
-        raise Exception(f"Request failed with status code {response.status_code}")
-
-    response_json = response.json()
-
-    if response_json is None:
-        raise Exception(f'Could not find image with id {id} and tag {tag}')
-
-    image_name = response_json['data']['name']
-    user_name = response_json['meta']['owner']['name']
-    return f'jinaai+docker://{user_name}/{image_name}:{tag}'
-
 def get_random_tag():
    return 't-' + uuid.uuid4().hex[:5]

@ -129,6 +110,4 @@ def push_vectordb_to_hubble(
    with open(os.path.join(tmpdir, 'config.yml'), mode='w', encoding='utf-8') as f:
        f.write(content)

-    executor_id = _push_to_hubble(tmpdir, image_name, tag, True, False)
-    id, tag = executor_id.split(':')
-    return get_uri(id, tag)
+    return _push_to_hubble(tmpdir, image_name, tag, True, False)