35 lines
1.1 KiB
Python
35 lines
1.1 KiB
Python
import os
|
|
import shutil
|
|
|
|
def find_all_files(path, suffix=".jpg"):
|
|
target_files = []
|
|
for cur_dir, _, files in os.walk(path, followlinks=True):
|
|
for f in files:
|
|
if f.endswith(suffix):
|
|
target_files.append(os.path.join(cur_dir, f))
|
|
print(f'find {len(target_files)} files...')
|
|
return target_files
|
|
|
|
all_files = find_all_files('archive')
|
|
os.makedirs("archive_split", exist_ok=True)
|
|
os.makedirs("archive_split/train", exist_ok=True)
|
|
os.makedirs("archive_split/valid", exist_ok=True)
|
|
os.makedirs("archive_split/test", exist_ok=True)
|
|
|
|
import random
|
|
random.seed(2023)
|
|
random.shuffle(all_files)
|
|
train = all_files[:8000]
|
|
valid = all_files[8000:8000+500]
|
|
test = all_files[8000+500:8000+500+1500]
|
|
|
|
print("building train")
|
|
for file in train:
|
|
shutil.move(file, os.path.join("archive_split/train", file.split("/")[-1]))
|
|
print("building valid")
|
|
for file in valid:
|
|
shutil.move(file, os.path.join("archive_split/valid", file.split("/")[-1]))
|
|
print("building test")
|
|
for file in test:
|
|
shutil.move(file, os.path.join("archive_split/test", file.split("/")[-1]))
|
|
print("done") |