diff --git a/object/vectorset_tsne_test.go b/object/vectorset_tsne_test.go index 2f79672..b8c9289 100644 --- a/object/vectorset_tsne_test.go +++ b/object/vectorset_tsne_test.go @@ -8,15 +8,15 @@ import ( func TestDoVectorsetTsne(t *testing.T) { InitConfig() - dimension := 5 + dimension := 50 - //vectorset := getVectorset("admin", "百度百科") - vectorset := getVectorset("admin", "wordVector_utf-8") + vectorset := getVectorset("admin", "百度百科") + //vectorset := getVectorset("admin", "wordVector_utf-8") vectorset.LoadVectors("../../tmpFiles/") vectorset.DoTsne(dimension) vectorset.Name = fmt.Sprintf("%s_Dim_%d", vectorset.Name, dimension) - vectorset.FileName = fmt.Sprintf("%s.ob", vectorset.FileName) + vectorset.FileName = fmt.Sprintf("%s_Dim_%d.csv", vectorset.FileName, dimension) vectorset.FileSize = "" vectorset.Dimension = dimension vectorset.WriteVectors("../../tmpFiles/") diff --git a/object/vectorset_upload.go b/object/vectorset_upload.go index 607677c..318b89b 100644 --- a/object/vectorset_upload.go +++ b/object/vectorset_upload.go @@ -13,7 +13,11 @@ func (vectorset *Vectorset) LoadVectors(pathPrefix string) { var nameArray []string var dataArray [][]float64 if strings.HasSuffix(vectorset.FileName, ".csv") { - nameArray, dataArray = util.LoadVectorFileByCsv(path) + if strings.Contains(vectorset.FileName, "_Dim_") { + nameArray, dataArray = util.LoadVectorFileByCsv2(path) + } else { + nameArray, dataArray = util.LoadVectorFileByCsv(path) + } } else { nameArray, dataArray = util.LoadVectorFileBySpace(path) } @@ -45,6 +49,7 @@ func (vectorset *Vectorset) WriteVectors(pathPrefix string) { rows := [][]string{} for _, vector := range vectorset.AllVectors { row := util.FloatsToStrings(vector.Data) + row = append([]string{vector.Name}, row...) rows = append(rows, row) } diff --git a/util/array.go b/util/array.go index 3104acf..4b5a676 100644 --- a/util/array.go +++ b/util/array.go @@ -9,3 +9,11 @@ func FloatsToStrings(floatArray []float64) []string { } return res } + +func StringsToFloats(stringArray []string) []float64 { + res := []float64{} + for _, s := range stringArray { + res = append(res, ParseFloat(s)) + } + return res +} diff --git a/util/file.go b/util/file.go index f9e7015..e4b6fc2 100644 --- a/util/file.go +++ b/util/file.go @@ -49,6 +49,27 @@ func LoadVectorFileByCsv(path string) ([]string, [][]float64) { return nameArray, dataArray } +func LoadVectorFileByCsv2(path string) ([]string, [][]float64) { + nameArray := []string{} + dataArray := [][]float64{} + + file, err := os.Open(path) + if err != nil { + panic(err) + } + defer file.Close() + + rows := [][]string{} + LoadCsvFile(path, &rows) + + for _, row := range rows { + nameArray = append(nameArray, row[0]) + dataArray = append(dataArray, StringsToFloats(row[1:])) + } + + return nameArray, dataArray +} + func LoadVectorFileBySpace(path string) ([]string, [][]float64) { nameArray := []string{} dataArray := [][]float64{}