From 8ef53c3353635f5aa4a36289a609fef20dbe48f2 Mon Sep 17 00:00:00 2001 From: Haifeng Luo Date: Sun, 10 Apr 2022 10:59:17 +0800 Subject: [PATCH] Add LoadVectors(). --- object/vectorset_upload.go | 42 +++++++++++++++++++++++++++++++++ object/vectorset_upload_test.go | 11 +++++++++ util/file.go | 34 ++++++++++++++++++++++++++ util/setting.go | 4 ++++ 4 files changed, 91 insertions(+) create mode 100644 object/vectorset_upload.go create mode 100644 object/vectorset_upload_test.go create mode 100644 util/file.go diff --git a/object/vectorset_upload.go b/object/vectorset_upload.go new file mode 100644 index 0000000..8a0ee7f --- /dev/null +++ b/object/vectorset_upload.go @@ -0,0 +1,42 @@ +package object + +import ( + "fmt" + + "github.com/openbrain/openbrain/util" +) + +func (vectorset *Vectorset) LoadVectors(pathPrefix string) { + path := util.GetUploadFilePath(fmt.Sprintf("%s%s", pathPrefix, vectorset.FileName)) + + rows := [][]string{} + util.LoadSpaceFile(path, &rows) + + exampleVectors := []*Vector{} + for i, row := range rows { + if i == 0 { + continue + } + + vectorData := []float64{} + for _, token := range row[1:] { + vectorData = append(vectorData, util.ParseFloat(token)) + } + + vector := &Vector{ + Name: row[0], + Data: vectorData, + } + if len(vector.Data) != vectorset.Dimension { + panic(fmt.Errorf("invalid vector data length: %d, expected = %d", len(vector.Data), vectorset.Dimension)) + } + + exampleVectors = append(exampleVectors, vector) + + if len(exampleVectors) == 100 { + break + } + } + + vectorset.Vectors = exampleVectors +} diff --git a/object/vectorset_upload_test.go b/object/vectorset_upload_test.go new file mode 100644 index 0000000..01f702a --- /dev/null +++ b/object/vectorset_upload_test.go @@ -0,0 +1,11 @@ +package object + +import "testing" + +func TestUpdateVectorsetVectors(t *testing.T) { + InitConfig() + + vectorset := getVectorset("admin", "百度百科") + vectorset.LoadVectors("../../tmpFiles/") + UpdateVectorset(vectorset.GetId(), vectorset) +} diff --git a/util/file.go b/util/file.go new file mode 100644 index 0000000..30e723e --- /dev/null +++ b/util/file.go @@ -0,0 +1,34 @@ +package util + +import ( + "bufio" + "os" + "strings" +) + +func LoadSpaceFile(path string, rows *[][]string) { + file, err := os.Open(path) + if err != nil { + panic(err) + } + defer file.Close() + + scanner := bufio.NewScanner(file) + const maxCapacity = 1024 * 1024 * 8 + buf := make([]byte, maxCapacity) + scanner.Buffer(buf, maxCapacity) + i := 0 + for scanner.Scan() { + line := scanner.Text() + + line = strings.Trim(line, " ") + tokens := strings.Split(line, " ") + *rows = append(*rows, tokens) + + i += 1 + } + + if err = scanner.Err(); err != nil { + panic(err) + } +} diff --git a/util/setting.go b/util/setting.go index f8dd62f..ae15f00 100644 --- a/util/setting.go +++ b/util/setting.go @@ -9,3 +9,7 @@ func GetUploadXlsxPath(fileId string) string { func GetUploadCsvPath(fileId string) string { return fmt.Sprintf("tmpFiles/%s.csv", fileId) } + +func GetUploadFilePath(fileId string) string { + return fmt.Sprintf("tmpFiles/%s", fileId) +}