Initial support to index json files (WIP PoC)

This commit is contained in:
pancake 2023-12-06 15:54:27 +01:00
parent 49dbbf28ae
commit 95a2abe010
1 changed files with 21 additions and 2 deletions

View File

@ -109,11 +109,27 @@ def json2md(text):
if isinstance(obj, list): if isinstance(obj, list):
for a in obj: for a in obj:
res += jsonwalk(a) res += jsonwalk(a)
elif isinstance(obj, dict):
if "file" in obj:
pass
# elif "impactType" in obj and obj["impactType"] == "pass":
# pass
elif "ts" in obj:
pass
else:
for k in obj.keys():
res += "## " + k + "\n"
lst = json.dumps(obj[k]).replace("{","").replace("}", "\n").replace("\"", "").replace(",", "*").split("\n")
res += "\n".join(list(filter(lambda k: 'crc64' not in k and 'file' not in k and 'from_text' not in k and 'backtrace' not in k, lst)))
res += "\n\n"
else: else:
res += jsonwalk(a) res += str(obj) # jsonwalk(obj)
return res return res
doc = json.loads(text) doc = json.loads(text)
res = jsonwalk(doc) res = jsonwalk(doc)
# print("==========")
# print(res)
# print("==========")
return res return res
def md2txt(text): def md2txt(text):
@ -182,10 +198,13 @@ def filter_line(line):
return words return words
def smart_slurp(file): def smart_slurp(file):
print("smart" + file)
# print(f"slurp: {file}") # print(f"slurp: {file}")
text = slurp(file) text = slurp(file)
if file.endswith("r2ai.history"): if file.endswith("r2ai.history"):
text = hist2txt(text) text = hist2txt(text)
elif file.endswith(".json"):
text = md2txt(json2md(text))
elif file.endswith(".md"): elif file.endswith(".md"):
text = md2txt(text) text = md2txt(text)
return text return text
@ -246,7 +265,7 @@ def vectordb_init():
def vectordb_search(text, keywords, source_files, use_mastodon, use_debug): def vectordb_search(text, keywords, source_files, use_mastodon, use_debug):
global have_vectordb, vectordb_instance global have_vectordb, vectordb_instance
if not have_vectordb: if have_vectordb == False:
return [] return []
if have_vectordb == True and vectordb_instance is not None: if have_vectordb == True and vectordb_instance is not None:
return vectordb_search2(text, keywords, use_mastodon) return vectordb_search2(text, keywords, use_mastodon)