Add LoadVectors().

This commit is contained in:
Haifeng Luo 2022-04-10 10:59:17 +08:00
parent 88294ff9ea
commit 8ef53c3353
4 changed files with 91 additions and 0 deletions

View File

@ -0,0 +1,42 @@
package object
import (
"fmt"
"github.com/openbrain/openbrain/util"
)
func (vectorset *Vectorset) LoadVectors(pathPrefix string) {
path := util.GetUploadFilePath(fmt.Sprintf("%s%s", pathPrefix, vectorset.FileName))
rows := [][]string{}
util.LoadSpaceFile(path, &rows)
exampleVectors := []*Vector{}
for i, row := range rows {
if i == 0 {
continue
}
vectorData := []float64{}
for _, token := range row[1:] {
vectorData = append(vectorData, util.ParseFloat(token))
}
vector := &Vector{
Name: row[0],
Data: vectorData,
}
if len(vector.Data) != vectorset.Dimension {
panic(fmt.Errorf("invalid vector data length: %d, expected = %d", len(vector.Data), vectorset.Dimension))
}
exampleVectors = append(exampleVectors, vector)
if len(exampleVectors) == 100 {
break
}
}
vectorset.Vectors = exampleVectors
}

View File

@ -0,0 +1,11 @@
package object
import "testing"
func TestUpdateVectorsetVectors(t *testing.T) {
InitConfig()
vectorset := getVectorset("admin", "百度百科")
vectorset.LoadVectors("../../tmpFiles/")
UpdateVectorset(vectorset.GetId(), vectorset)
}

34
util/file.go Normal file
View File

@ -0,0 +1,34 @@
package util
import (
"bufio"
"os"
"strings"
)
func LoadSpaceFile(path string, rows *[][]string) {
file, err := os.Open(path)
if err != nil {
panic(err)
}
defer file.Close()
scanner := bufio.NewScanner(file)
const maxCapacity = 1024 * 1024 * 8
buf := make([]byte, maxCapacity)
scanner.Buffer(buf, maxCapacity)
i := 0
for scanner.Scan() {
line := scanner.Text()
line = strings.Trim(line, " ")
tokens := strings.Split(line, " ")
*rows = append(*rows, tokens)
i += 1
}
if err = scanner.Err(); err != nil {
panic(err)
}
}

View File

@ -9,3 +9,7 @@ func GetUploadXlsxPath(fileId string) string {
func GetUploadCsvPath(fileId string) string {
return fmt.Sprintf("tmpFiles/%s.csv", fileId)
}
func GetUploadFilePath(fileId string) string {
return fmt.Sprintf("tmpFiles/%s", fileId)
}