Add LoadVectorFileByCsv2().

This commit is contained in:
Haifeng Luo 2022-04-12 23:22:46 +08:00
parent 3e3c25c3ba
commit 17a8234773
4 changed files with 39 additions and 5 deletions

View File

@ -8,15 +8,15 @@ import (
func TestDoVectorsetTsne(t *testing.T) { func TestDoVectorsetTsne(t *testing.T) {
InitConfig() InitConfig()
dimension := 5 dimension := 50
//vectorset := getVectorset("admin", "百度百科") vectorset := getVectorset("admin", "百度百科")
vectorset := getVectorset("admin", "wordVector_utf-8") //vectorset := getVectorset("admin", "wordVector_utf-8")
vectorset.LoadVectors("../../tmpFiles/") vectorset.LoadVectors("../../tmpFiles/")
vectorset.DoTsne(dimension) vectorset.DoTsne(dimension)
vectorset.Name = fmt.Sprintf("%s_Dim_%d", vectorset.Name, dimension) vectorset.Name = fmt.Sprintf("%s_Dim_%d", vectorset.Name, dimension)
vectorset.FileName = fmt.Sprintf("%s.ob", vectorset.FileName) vectorset.FileName = fmt.Sprintf("%s_Dim_%d.csv", vectorset.FileName, dimension)
vectorset.FileSize = "" vectorset.FileSize = ""
vectorset.Dimension = dimension vectorset.Dimension = dimension
vectorset.WriteVectors("../../tmpFiles/") vectorset.WriteVectors("../../tmpFiles/")

View File

@ -13,7 +13,11 @@ func (vectorset *Vectorset) LoadVectors(pathPrefix string) {
var nameArray []string var nameArray []string
var dataArray [][]float64 var dataArray [][]float64
if strings.HasSuffix(vectorset.FileName, ".csv") { if strings.HasSuffix(vectorset.FileName, ".csv") {
if strings.Contains(vectorset.FileName, "_Dim_") {
nameArray, dataArray = util.LoadVectorFileByCsv2(path)
} else {
nameArray, dataArray = util.LoadVectorFileByCsv(path) nameArray, dataArray = util.LoadVectorFileByCsv(path)
}
} else { } else {
nameArray, dataArray = util.LoadVectorFileBySpace(path) nameArray, dataArray = util.LoadVectorFileBySpace(path)
} }
@ -45,6 +49,7 @@ func (vectorset *Vectorset) WriteVectors(pathPrefix string) {
rows := [][]string{} rows := [][]string{}
for _, vector := range vectorset.AllVectors { for _, vector := range vectorset.AllVectors {
row := util.FloatsToStrings(vector.Data) row := util.FloatsToStrings(vector.Data)
row = append([]string{vector.Name}, row...)
rows = append(rows, row) rows = append(rows, row)
} }

View File

@ -9,3 +9,11 @@ func FloatsToStrings(floatArray []float64) []string {
} }
return res return res
} }
func StringsToFloats(stringArray []string) []float64 {
res := []float64{}
for _, s := range stringArray {
res = append(res, ParseFloat(s))
}
return res
}

View File

@ -49,6 +49,27 @@ func LoadVectorFileByCsv(path string) ([]string, [][]float64) {
return nameArray, dataArray return nameArray, dataArray
} }
func LoadVectorFileByCsv2(path string) ([]string, [][]float64) {
nameArray := []string{}
dataArray := [][]float64{}
file, err := os.Open(path)
if err != nil {
panic(err)
}
defer file.Close()
rows := [][]string{}
LoadCsvFile(path, &rows)
for _, row := range rows {
nameArray = append(nameArray, row[0])
dataArray = append(dataArray, StringsToFloats(row[1:]))
}
return nameArray, dataArray
}
func LoadVectorFileBySpace(path string) ([]string, [][]float64) { func LoadVectorFileBySpace(path string) ([]string, [][]float64) {
nameArray := []string{} nameArray := []string{}
dataArray := [][]float64{} dataArray := [][]float64{}