fix: skip audio and video files during directory scan in RAG Service. (#1509)
This commit is contained in:
@@ -660,14 +660,33 @@ def scan_directory(directory: Path) -> list[str]:
|
|||||||
"""Scan directory and return a list of matched files."""
|
"""Scan directory and return a list of matched files."""
|
||||||
spec = get_pathspec(directory)
|
spec = get_pathspec(directory)
|
||||||
|
|
||||||
|
audio_video_exts = [
|
||||||
|
".mp3",
|
||||||
|
".wav",
|
||||||
|
".ogg",
|
||||||
|
".flac",
|
||||||
|
".aac",
|
||||||
|
".m4a",
|
||||||
|
".wma",
|
||||||
|
".mp4",
|
||||||
|
".avi",
|
||||||
|
".mov",
|
||||||
|
".wmv",
|
||||||
|
".flv",
|
||||||
|
".mkv",
|
||||||
|
".webm",
|
||||||
|
]
|
||||||
|
|
||||||
matched_files = []
|
matched_files = []
|
||||||
|
|
||||||
for root, _, files in os.walk(directory):
|
for root, _, files in os.walk(directory):
|
||||||
file_paths = [str(Path(root) / file) for file in files]
|
file_paths = [str(Path(root) / file) for file in files]
|
||||||
if not spec:
|
|
||||||
matched_files.extend(file_paths)
|
|
||||||
continue
|
|
||||||
for file in file_paths:
|
for file in file_paths:
|
||||||
|
file_ext = Path(file).suffix.lower()
|
||||||
|
if file_ext in audio_video_exts:
|
||||||
|
logger.info("Skipping audio/video file: %s", file)
|
||||||
|
continue
|
||||||
|
|
||||||
if spec and spec.match_file(os.path.relpath(file, directory)):
|
if spec and spec.match_file(os.path.relpath(file, directory)):
|
||||||
logger.info("Ignoring file: %s", file)
|
logger.info("Ignoring file: %s", file)
|
||||||
else:
|
else:
|
||||||
|
|||||||
Reference in New Issue
Block a user