I have a folder structure with some epubs and json files in the down-most folders (not counting the .ts
folders). I’m exporting tags from the json files to tagspaces, by creating a .ts
folder with some other json
files. I’ve already processed part of the files and now I want to find the leaf folders that don’t have a .ts
folder in their path, to find the remaining files without having to process the others twice.
I want to process the files in the directories as I find them instead of getting a list of directories and then looping through them. On the example below I’ve returned the list of directories only to be able to test it.
So for this example I only want to do something for the folder t5
:
test
├── t1
│ ├── t2
│ │ └── t5
│ └── t3
│ └── .ts
└── .ts
└── t4
This is what I’ve tried:
import os
import shutil
from typing import List
def process_files_in_leaf_subdirectories(dir: str) -> List[str]:
dirs = []
for root, subdirs, filenames in os.walk(dir):
if subdirs or '.ts' in root:
continue
dirs.append(root)
return dirs
def test_process_files_in_leaf_subdirectories():
os.makedirs('tmp/t1/t2/t5', exist_ok=True)
os.makedirs('tmp/t1/t3/.ts', exist_ok=True)
os.makedirs('tmp/.ts/t4', exist_ok=True)
assert get_files_in_leaf_subdirectories('tmp') == ['tmp/t1/t2/t5']
shutil.rmtree('tmp')
The next example works fine but it gets the list of directories instead of processing the files as they are found:
import os
import shutil
from pathlib import Path
from typing import List
def process_files_in_leaf_dir(leaves: List[Path]) -> List[str]:
files = []
for dir in leaves:
for meta_file in dir.glob("*.json"):
files.append(meta_file)
return files
def find_leaf_dirs(root_path: Path) -> Path:
# filter subdirectories
child_dirs = [path for path in root_path.iterdir() if path.is_dir()]
# if no child_dir, yield & return
if not child_dirs:
yield root_path
return
# otherwise iter tru subdir
for path in child_dirs:
# ignore hidden dir
if path.stem[0] == ".":
continue
# step in and recursive yield
yield from find_leaf_dirs(path)
def test_process_files_in_leaf_dir():
os.makedirs('tmp/t1/t2/t5', exist_ok=True)
os.makedirs('tmp/t1/t3/.ts', exist_ok=True)
os.makedirs('tmp/.ts/t4', exist_ok=True)
Path('tmp/t1/t2/t5/test.json').touch()
Path('tmp/t1/t3/test.json').touch()
Path('tmp/t1/t3/.ts/test.json').touch()
Path('tmp/.ts/t4/test.json').touch()
leaves = list(find_leaf_dirs(Path('tmp')))
assert process_files_in_leaf_dir(leaves) == [Path('tmp/t1/t2/t5') / 'test.json']
shutil.rmtree('tmp')