feat: RAG service (#1220)
This commit is contained in:
33
py/rag-service/Dockerfile
Normal file
33
py/rag-service/Dockerfile
Normal file
@@ -0,0 +1,33 @@
|
||||
FROM debian:bookworm-slim
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
RUN apt-get update && apt-get install -y \
|
||||
build-essential \
|
||||
curl \
|
||||
&& rm -rf /var/lib/apt/lists/* \
|
||||
&& curl -LsSf https://astral.sh/uv/install.sh | sh
|
||||
|
||||
ENV PATH="/root/.local/bin:$PATH"
|
||||
|
||||
RUN uv python install 3.11
|
||||
|
||||
RUN uv python list
|
||||
|
||||
ENV PATH="/root/.uv/python/3.11/bin:$PATH"
|
||||
|
||||
COPY requirements.txt .
|
||||
|
||||
RUN uv venv --python 3.11
|
||||
|
||||
RUN uv pip install -r requirements.txt
|
||||
|
||||
ENV PYTHONUNBUFFERED=1 \
|
||||
PYTHONDONTWRITEBYTECODE=1 \
|
||||
PORT=8000
|
||||
|
||||
EXPOSE ${PORT}
|
||||
|
||||
COPY . .
|
||||
|
||||
CMD ["uv", "run", "fastapi", "run", "src/main.py", "--workers", "3"]
|
||||
162
py/rag-service/requirements.txt
Normal file
162
py/rag-service/requirements.txt
Normal file
@@ -0,0 +1,162 @@
|
||||
aiohappyeyeballs==2.4.6
|
||||
aiohttp==3.11.12
|
||||
aiosignal==1.3.2
|
||||
annotated-types==0.7.0
|
||||
anyio==4.8.0
|
||||
asgiref==3.8.1
|
||||
asttokens==3.0.0
|
||||
attrs==25.1.0
|
||||
backoff==2.2.1
|
||||
bcrypt==4.2.1
|
||||
beautifulsoup4==4.13.3
|
||||
build==1.2.2.post1
|
||||
cachetools==5.5.1
|
||||
certifi==2024.12.14
|
||||
charset-normalizer==3.4.1
|
||||
chroma-hnswlib==0.7.6
|
||||
chromadb==0.6.3
|
||||
click==8.1.8
|
||||
coloredlogs==15.0.1
|
||||
dataclasses-json==0.6.7
|
||||
decorator==5.1.1
|
||||
Deprecated==1.2.18
|
||||
dirtyjson==1.0.8
|
||||
distro==1.9.0
|
||||
dnspython==2.7.0
|
||||
durationpy==0.9
|
||||
email_validator==2.2.0
|
||||
executing==2.2.0
|
||||
fastapi==0.115.8
|
||||
fastapi-cli==0.0.7
|
||||
filelock==3.17.0
|
||||
filetype==1.2.0
|
||||
flatbuffers==25.1.24
|
||||
frozenlist==1.5.0
|
||||
fsspec==2025.2.0
|
||||
google-auth==2.38.0
|
||||
googleapis-common-protos==1.66.0
|
||||
greenlet==3.1.1
|
||||
grpcio==1.70.0
|
||||
h11==0.14.0
|
||||
httpcore==1.0.7
|
||||
httptools==0.6.4
|
||||
httpx==0.28.1
|
||||
huggingface-hub==0.28.1
|
||||
humanfriendly==10.0
|
||||
idna==3.10
|
||||
importlib_metadata==8.5.0
|
||||
importlib_resources==6.5.2
|
||||
ipython==8.32.0
|
||||
jedi==0.19.2
|
||||
Jinja2==3.1.5
|
||||
jiter==0.8.2
|
||||
joblib==1.4.2
|
||||
kubernetes==32.0.0
|
||||
llama-cloud==0.1.11
|
||||
llama-cloud-services==0.6.0
|
||||
llama-index==0.12.16
|
||||
llama-index-agent-openai==0.4.3
|
||||
llama-index-cli==0.4.0
|
||||
llama-index-core==0.12.16.post1
|
||||
llama-index-embeddings-openai==0.3.1
|
||||
llama-index-indices-managed-llama-cloud==0.6.4
|
||||
llama-index-llms-openai==0.3.18
|
||||
llama-index-multi-modal-llms-openai==0.4.3
|
||||
llama-index-program-openai==0.3.1
|
||||
llama-index-question-gen-openai==0.3.0
|
||||
llama-index-readers-file==0.4.4
|
||||
llama-index-readers-llama-parse==0.4.0
|
||||
llama-index-vector-stores-chroma==0.4.1
|
||||
llama-parse==0.6.0
|
||||
markdown-it-py==3.0.0
|
||||
markdownify==0.14.1
|
||||
MarkupSafe==3.0.2
|
||||
marshmallow==3.26.1
|
||||
matplotlib-inline==0.1.7
|
||||
mdurl==0.1.2
|
||||
mmh3==5.1.0
|
||||
monotonic==1.6
|
||||
mpmath==1.3.0
|
||||
multidict==6.1.0
|
||||
mypy-extensions==1.0.0
|
||||
nest-asyncio==1.6.0
|
||||
networkx==3.4.2
|
||||
nltk==3.9.1
|
||||
numpy==2.2.2
|
||||
oauthlib==3.2.2
|
||||
onnxruntime==1.20.1
|
||||
openai==1.61.1
|
||||
opentelemetry-api==1.30.0
|
||||
opentelemetry-exporter-otlp-proto-common==1.30.0
|
||||
opentelemetry-exporter-otlp-proto-grpc==1.30.0
|
||||
opentelemetry-instrumentation==0.51b0
|
||||
opentelemetry-instrumentation-asgi==0.51b0
|
||||
opentelemetry-instrumentation-fastapi==0.51b0
|
||||
opentelemetry-proto==1.30.0
|
||||
opentelemetry-sdk==1.30.0
|
||||
opentelemetry-semantic-conventions==0.51b0
|
||||
opentelemetry-util-http==0.51b0
|
||||
orjson==3.10.15
|
||||
overrides==7.7.0
|
||||
packaging==24.2
|
||||
pandas==2.2.3
|
||||
parso==0.8.4
|
||||
pathspec==0.12.1
|
||||
pexpect==4.9.0
|
||||
pillow==11.1.0
|
||||
posthog==3.11.0
|
||||
prompt_toolkit==3.0.50
|
||||
propcache==0.2.1
|
||||
protobuf==5.29.3
|
||||
ptyprocess==0.7.0
|
||||
pure_eval==0.2.3
|
||||
pyasn1==0.6.1
|
||||
pyasn1_modules==0.4.1
|
||||
pydantic==2.10.6
|
||||
pydantic_core==2.27.2
|
||||
Pygments==2.19.1
|
||||
pypdf==5.2.0
|
||||
PyPika==0.48.9
|
||||
pyproject_hooks==1.2.0
|
||||
python-dateutil==2.9.0.post0
|
||||
python-dotenv==1.0.1
|
||||
python-multipart==0.0.20
|
||||
pytz==2025.1
|
||||
PyYAML==6.0.2
|
||||
regex==2024.11.6
|
||||
requests==2.32.3
|
||||
requests-oauthlib==2.0.0
|
||||
rich==13.9.4
|
||||
rich-toolkit==0.13.2
|
||||
rsa==4.9
|
||||
shellingham==1.5.4
|
||||
six==1.17.0
|
||||
sniffio==1.3.1
|
||||
soupsieve==2.6
|
||||
SQLAlchemy==2.0.38
|
||||
stack-data==0.6.3
|
||||
starlette==0.45.3
|
||||
striprtf==0.0.26
|
||||
sympy==1.13.3
|
||||
tenacity==9.0.0
|
||||
tiktoken==0.8.0
|
||||
tokenizers==0.21.0
|
||||
tqdm==4.67.1
|
||||
traitlets==5.14.3
|
||||
tree-sitter==0.21.3
|
||||
tree-sitter-languages==1.10.2
|
||||
typer==0.15.1
|
||||
typing-inspect==0.9.0
|
||||
typing_extensions==4.12.2
|
||||
tzdata==2025.1
|
||||
urllib3==2.3.0
|
||||
uvicorn==0.34.0
|
||||
uvloop==0.21.0
|
||||
watchdog==6.0.0
|
||||
watchfiles==1.0.4
|
||||
wcwidth==0.2.13
|
||||
websocket-client==1.8.0
|
||||
websockets==14.2
|
||||
wrapt==1.17.2
|
||||
yarl==1.18.3
|
||||
zipp==3.21.0
|
||||
0
py/rag-service/src/libs/__init__.py
Normal file
0
py/rag-service/src/libs/__init__.py
Normal file
14
py/rag-service/src/libs/configs.py
Normal file
14
py/rag-service/src/libs/configs.py
Normal file
@@ -0,0 +1,14 @@
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
# Configuration
|
||||
BASE_DATA_DIR = Path(os.environ.get("DATA_DIR", "data"))
|
||||
CHROMA_PERSIST_DIR = BASE_DATA_DIR / "chroma_db"
|
||||
LOG_DIR = BASE_DATA_DIR / "logs"
|
||||
DB_FILE = BASE_DATA_DIR / "sqlite" / "indexing_history.db"
|
||||
|
||||
# Configure directories
|
||||
BASE_DATA_DIR.mkdir(parents=True, exist_ok=True)
|
||||
LOG_DIR.mkdir(parents=True, exist_ok=True)
|
||||
DB_FILE.parent.mkdir(parents=True, exist_ok=True) # Create sqlite directory
|
||||
CHROMA_PERSIST_DIR.mkdir(parents=True, exist_ok=True)
|
||||
60
py/rag-service/src/libs/db.py
Normal file
60
py/rag-service/src/libs/db.py
Normal file
@@ -0,0 +1,60 @@
|
||||
import sqlite3
|
||||
from collections.abc import Generator
|
||||
from contextlib import contextmanager
|
||||
|
||||
from libs.configs import DB_FILE
|
||||
|
||||
# SQLite table schemas
|
||||
CREATE_TABLES_SQL = """
|
||||
CREATE TABLE IF NOT EXISTS indexing_history (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
uri TEXT NOT NULL,
|
||||
content_hash TEXT NOT NULL,
|
||||
status TEXT NOT NULL,
|
||||
timestamp DATETIME DEFAULT CURRENT_TIMESTAMP,
|
||||
error_message TEXT,
|
||||
document_id TEXT,
|
||||
metadata TEXT
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_uri ON indexing_history(uri);
|
||||
CREATE INDEX IF NOT EXISTS idx_document_id ON indexing_history(document_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_content_hash ON indexing_history(content_hash);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS resources (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
name TEXT NOT NULL UNIQUE,
|
||||
uri TEXT NOT NULL UNIQUE,
|
||||
type TEXT NOT NULL, -- 'path' or 'https'
|
||||
status TEXT NOT NULL DEFAULT 'active', -- 'active' or 'inactive'
|
||||
indexing_status TEXT NOT NULL DEFAULT 'pending', -- 'pending', 'indexing', 'indexed', 'failed'
|
||||
indexing_status_message TEXT,
|
||||
indexing_started_at DATETIME,
|
||||
created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
|
||||
last_indexed_at DATETIME,
|
||||
last_error TEXT
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_resources_name ON resources(name);
|
||||
CREATE INDEX IF NOT EXISTS idx_resources_uri ON resources(uri);
|
||||
CREATE INDEX IF NOT EXISTS idx_resources_status ON resources(status);
|
||||
CREATE INDEX IF NOT EXISTS idx_status ON indexing_history(status);
|
||||
"""
|
||||
|
||||
|
||||
@contextmanager
|
||||
def get_db_connection() -> Generator[sqlite3.Connection, None, None]:
|
||||
"""Get a database connection."""
|
||||
conn = sqlite3.connect(DB_FILE)
|
||||
conn.row_factory = sqlite3.Row
|
||||
try:
|
||||
yield conn
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
def init_db() -> None:
|
||||
"""Initialize the SQLite database."""
|
||||
with get_db_connection() as conn:
|
||||
conn.executescript(CREATE_TABLES_SQL)
|
||||
conn.commit()
|
||||
16
py/rag-service/src/libs/logger.py
Normal file
16
py/rag-service/src/libs/logger.py
Normal file
@@ -0,0 +1,16 @@
|
||||
import logging
|
||||
from datetime import datetime
|
||||
|
||||
from libs.configs import LOG_DIR
|
||||
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format="%(asctime)s - %(levelname)s - %(message)s",
|
||||
handlers=[
|
||||
logging.FileHandler(
|
||||
LOG_DIR / f"rag_service_{datetime.now().astimezone().strftime('%Y%m%d')}.log",
|
||||
),
|
||||
logging.StreamHandler(),
|
||||
],
|
||||
)
|
||||
logger = logging.getLogger(__name__)
|
||||
66
py/rag-service/src/libs/utils.py
Normal file
66
py/rag-service/src/libs/utils.py
Normal file
@@ -0,0 +1,66 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
from pathlib import Path
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from llama_index.core.schema import BaseNode
|
||||
|
||||
PATTERN_URI_PART = re.compile(r"(?P<uri>.+)__part_\d+")
|
||||
METADATA_KEY_URI = "uri"
|
||||
|
||||
|
||||
def uri_to_path(uri: str) -> Path:
|
||||
"""Convert URI to path."""
|
||||
return Path(uri.replace("file://", ""))
|
||||
|
||||
|
||||
def path_to_uri(file_path: Path) -> str:
|
||||
"""Convert path to URI."""
|
||||
uri = file_path.as_uri()
|
||||
if file_path.is_dir():
|
||||
uri += "/"
|
||||
return uri
|
||||
|
||||
|
||||
def is_local_uri(uri: str) -> bool:
|
||||
"""Check if the URI is a path URI."""
|
||||
return uri.startswith("file://")
|
||||
|
||||
|
||||
def is_remote_uri(uri: str) -> bool:
|
||||
"""Check if the URI is an HTTPS URI or HTTP URI."""
|
||||
return uri.startswith(("https://", "http://"))
|
||||
|
||||
|
||||
def is_path_node(node: BaseNode) -> bool:
|
||||
"""Check if the node is a file node."""
|
||||
uri = get_node_uri(node)
|
||||
if not uri:
|
||||
return False
|
||||
return is_local_uri(uri)
|
||||
|
||||
|
||||
def get_node_uri(node: BaseNode) -> str | None:
|
||||
"""Get URI from node metadata."""
|
||||
uri = node.metadata.get(METADATA_KEY_URI)
|
||||
if not uri:
|
||||
doc_id = getattr(node, "doc_id", None)
|
||||
if doc_id:
|
||||
match = PATTERN_URI_PART.match(doc_id)
|
||||
uri = match.group("uri") if match else doc_id
|
||||
if uri:
|
||||
if uri.startswith("/"):
|
||||
uri = f"file://{uri}"
|
||||
return uri
|
||||
return None
|
||||
|
||||
|
||||
def inject_uri_to_node(node: BaseNode) -> None:
|
||||
"""Inject file path into node metadata."""
|
||||
if METADATA_KEY_URI in node.metadata:
|
||||
return
|
||||
uri = get_node_uri(node)
|
||||
if uri:
|
||||
node.metadata[METADATA_KEY_URI] = uri
|
||||
1114
py/rag-service/src/main.py
Normal file
1114
py/rag-service/src/main.py
Normal file
File diff suppressed because it is too large
Load Diff
0
py/rag-service/src/models/__init__.py
Normal file
0
py/rag-service/src/models/__init__.py
Normal file
19
py/rag-service/src/models/indexing_history.py
Normal file
19
py/rag-service/src/models/indexing_history.py
Normal file
@@ -0,0 +1,19 @@
|
||||
"""Indexing History Model."""
|
||||
|
||||
from datetime import datetime
|
||||
from typing import Any
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
|
||||
class IndexingHistory(BaseModel):
|
||||
"""Model for indexing history record."""
|
||||
|
||||
id: int | None = Field(None, description="Record ID")
|
||||
uri: str = Field(..., description="URI of the indexed file")
|
||||
content_hash: str = Field(..., description="MD5 hash of the file content")
|
||||
status: str = Field(..., description="Indexing status (indexing/completed/failed)")
|
||||
timestamp: datetime = Field(default_factory=datetime.now, description="Record timestamp")
|
||||
error_message: str | None = Field(None, description="Error message if failed")
|
||||
document_id: str | None = Field(None, description="Document ID in the index")
|
||||
metadata: dict[str, Any] | None = Field(None, description="Additional metadata")
|
||||
25
py/rag-service/src/models/resource.py
Normal file
25
py/rag-service/src/models/resource.py
Normal file
@@ -0,0 +1,25 @@
|
||||
"""Resource Model."""
|
||||
|
||||
from datetime import datetime
|
||||
from typing import Literal
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
|
||||
class Resource(BaseModel):
|
||||
"""Model for resource record."""
|
||||
|
||||
id: int | None = Field(None, description="Resource ID")
|
||||
name: str = Field(..., description="Name of the resource")
|
||||
uri: str = Field(..., description="URI of the resource")
|
||||
type: Literal["local", "remote"] = Field(..., description="Type of resource (path/https)")
|
||||
status: str = Field("active", description="Status of resource (active/inactive)")
|
||||
indexing_status: Literal["pending", "indexing", "indexed", "failed"] = Field(
|
||||
"pending",
|
||||
description="Indexing status (pending/indexing/indexed/failed)",
|
||||
)
|
||||
indexing_status_message: str | None = Field(None, description="Indexing status message")
|
||||
created_at: datetime = Field(default_factory=datetime.now, description="Creation timestamp")
|
||||
indexing_started_at: datetime | None = Field(None, description="Indexing start timestamp")
|
||||
last_indexed_at: datetime | None = Field(None, description="Last indexing timestamp")
|
||||
last_error: str | None = Field(None, description="Last error message if any")
|
||||
0
py/rag-service/src/services/__init__.py
Normal file
0
py/rag-service/src/services/__init__.py
Normal file
174
py/rag-service/src/services/indexing_history.py
Normal file
174
py/rag-service/src/services/indexing_history.py
Normal file
@@ -0,0 +1,174 @@
|
||||
import json
|
||||
import os
|
||||
from datetime import datetime
|
||||
from typing import Any
|
||||
|
||||
from libs.db import get_db_connection
|
||||
from libs.logger import logger
|
||||
from libs.utils import get_node_uri
|
||||
from llama_index.core.schema import Document
|
||||
from models.indexing_history import IndexingHistory
|
||||
|
||||
|
||||
class IndexingHistoryService:
|
||||
def delete_indexing_status(self, uri: str) -> None:
|
||||
"""Delete indexing status for a specific file."""
|
||||
with get_db_connection() as conn:
|
||||
conn.execute(
|
||||
"""
|
||||
DELETE FROM indexing_history
|
||||
WHERE uri = ?
|
||||
""",
|
||||
(uri,),
|
||||
)
|
||||
conn.commit()
|
||||
|
||||
def delete_indexing_status_by_document_id(self, document_id: str) -> None:
|
||||
"""Delete indexing status for a specific document."""
|
||||
with get_db_connection() as conn:
|
||||
conn.execute(
|
||||
"""
|
||||
DELETE FROM indexing_history
|
||||
WHERE document_id = ?
|
||||
""",
|
||||
(document_id,),
|
||||
)
|
||||
conn.commit()
|
||||
|
||||
def update_indexing_status(
|
||||
self,
|
||||
doc: Document,
|
||||
status: str,
|
||||
error_message: str | None = None,
|
||||
metadata: dict[str, Any] | None = None,
|
||||
) -> None:
|
||||
"""Update the indexing status in the database."""
|
||||
content_hash = doc.hash
|
||||
|
||||
# Get URI from metadata if available
|
||||
uri = get_node_uri(doc)
|
||||
if not uri:
|
||||
logger.warning("URI not found for document: %s", doc.doc_id)
|
||||
return
|
||||
|
||||
record = IndexingHistory(
|
||||
id=None,
|
||||
uri=uri,
|
||||
content_hash=content_hash,
|
||||
status=status,
|
||||
error_message=error_message,
|
||||
document_id=doc.doc_id,
|
||||
metadata=metadata,
|
||||
)
|
||||
with get_db_connection() as conn:
|
||||
# Check if record exists
|
||||
existing = conn.execute(
|
||||
"SELECT id FROM indexing_history WHERE document_id = ?",
|
||||
(doc.doc_id,),
|
||||
).fetchone()
|
||||
|
||||
if existing:
|
||||
# Update existing record
|
||||
conn.execute(
|
||||
"""
|
||||
UPDATE indexing_history
|
||||
SET content_hash = ?, status = ?, error_message = ?, document_id = ?, metadata = ?
|
||||
WHERE uri = ?
|
||||
""",
|
||||
(
|
||||
record.content_hash,
|
||||
record.status,
|
||||
record.error_message,
|
||||
record.document_id,
|
||||
json.dumps(record.metadata) if record.metadata else None,
|
||||
record.uri,
|
||||
),
|
||||
)
|
||||
else:
|
||||
# Insert new record
|
||||
conn.execute(
|
||||
"""
|
||||
INSERT INTO indexing_history
|
||||
(uri, content_hash, status, error_message, document_id, metadata)
|
||||
VALUES (?, ?, ?, ?, ?, ?)
|
||||
""",
|
||||
(
|
||||
record.uri,
|
||||
record.content_hash,
|
||||
record.status,
|
||||
record.error_message,
|
||||
record.document_id,
|
||||
json.dumps(record.metadata) if record.metadata else None,
|
||||
),
|
||||
)
|
||||
conn.commit()
|
||||
|
||||
def get_indexing_status(self, doc: Document | None = None, base_uri: str | None = None) -> list[IndexingHistory]:
|
||||
"""Get indexing status from the database."""
|
||||
with get_db_connection() as conn:
|
||||
if doc:
|
||||
uri = get_node_uri(doc)
|
||||
if not uri:
|
||||
logger.warning("URI not found for document: %s", doc.doc_id)
|
||||
return []
|
||||
content_hash = doc.hash
|
||||
# For a specific file, get its latest status
|
||||
query = """
|
||||
SELECT *
|
||||
FROM indexing_history
|
||||
WHERE uri = ? and content_hash = ?
|
||||
ORDER BY timestamp DESC LIMIT 1
|
||||
"""
|
||||
params = (uri, content_hash)
|
||||
elif base_uri:
|
||||
# For files in a specific directory, get their latest status
|
||||
query = """
|
||||
WITH RankedHistory AS (
|
||||
SELECT *,
|
||||
ROW_NUMBER() OVER (PARTITION BY document_id ORDER BY timestamp DESC) as rn
|
||||
FROM indexing_history
|
||||
WHERE uri LIKE ? || '%'
|
||||
)
|
||||
SELECT id, uri, content_hash, status, timestamp, error_message, document_id, metadata
|
||||
FROM RankedHistory
|
||||
WHERE rn = 1
|
||||
ORDER BY timestamp DESC
|
||||
"""
|
||||
params = (base_uri,) if base_uri.endswith(os.path.sep) else (base_uri + os.path.sep,)
|
||||
else:
|
||||
# For all files, get their latest status
|
||||
query = """
|
||||
WITH RankedHistory AS (
|
||||
SELECT *,
|
||||
ROW_NUMBER() OVER (PARTITION BY uri ORDER BY timestamp DESC) as rn
|
||||
FROM indexing_history
|
||||
)
|
||||
SELECT id, uri, content_hash, status, timestamp, error_message, document_id, metadata
|
||||
FROM RankedHistory
|
||||
WHERE rn = 1
|
||||
ORDER BY timestamp DESC
|
||||
"""
|
||||
params = ()
|
||||
|
||||
rows = conn.execute(query, params).fetchall()
|
||||
|
||||
result = []
|
||||
for row in rows:
|
||||
row_dict = dict(row)
|
||||
# Parse metadata JSON if it exists
|
||||
if row_dict.get("metadata"):
|
||||
try:
|
||||
row_dict["metadata"] = json.loads(row_dict["metadata"])
|
||||
except json.JSONDecodeError:
|
||||
row_dict["metadata"] = None
|
||||
# Parse timestamp string to datetime if needed
|
||||
if isinstance(row_dict.get("timestamp"), str):
|
||||
row_dict["timestamp"] = datetime.fromisoformat(
|
||||
row_dict["timestamp"].replace("Z", "+00:00"),
|
||||
)
|
||||
result.append(IndexingHistory(**row_dict))
|
||||
|
||||
return result
|
||||
|
||||
|
||||
indexing_history_service = IndexingHistoryService()
|
||||
104
py/rag-service/src/services/resource.py
Normal file
104
py/rag-service/src/services/resource.py
Normal file
@@ -0,0 +1,104 @@
|
||||
"""Resource Service."""
|
||||
|
||||
from libs.db import get_db_connection
|
||||
from models.resource import Resource
|
||||
|
||||
|
||||
class ResourceService:
|
||||
"""Resource Service."""
|
||||
|
||||
def add_resource_to_db(self, resource: Resource) -> None:
|
||||
"""Add a resource to the database."""
|
||||
with get_db_connection() as conn:
|
||||
conn.execute(
|
||||
"""
|
||||
INSERT INTO resources (name, uri, type, status, indexing_status, created_at)
|
||||
VALUES (?, ?, ?, ?, ?, ?)
|
||||
""",
|
||||
(
|
||||
resource.name,
|
||||
resource.uri,
|
||||
resource.type,
|
||||
resource.status,
|
||||
resource.indexing_status,
|
||||
resource.created_at,
|
||||
),
|
||||
)
|
||||
conn.commit()
|
||||
|
||||
def update_resource_indexing_status(self, uri: str, indexing_status: str, indexing_status_message: str) -> None:
|
||||
"""Update resource indexing status in the database."""
|
||||
with get_db_connection() as conn:
|
||||
if indexing_status == "indexing":
|
||||
conn.execute(
|
||||
"""
|
||||
UPDATE resources
|
||||
SET indexing_status = ?, indexing_status_message = ?, indexing_started_at = CURRENT_TIMESTAMP
|
||||
WHERE uri = ?
|
||||
""",
|
||||
(indexing_status, indexing_status_message, uri),
|
||||
)
|
||||
else:
|
||||
conn.execute(
|
||||
"""
|
||||
UPDATE resources
|
||||
SET indexing_status = ?, indexing_status_message = ?, last_indexed_at = CURRENT_TIMESTAMP
|
||||
WHERE uri = ?
|
||||
""",
|
||||
(indexing_status, indexing_status_message, uri),
|
||||
)
|
||||
conn.commit()
|
||||
|
||||
def update_resource_status(self, uri: str, status: str, error: str | None = None) -> None:
|
||||
"""Update resource status in the database."""
|
||||
with get_db_connection() as conn:
|
||||
if status == "active":
|
||||
conn.execute(
|
||||
"""
|
||||
UPDATE resources
|
||||
SET status = ?, last_indexed_at = CURRENT_TIMESTAMP, last_error = ?
|
||||
WHERE uri = ?
|
||||
""",
|
||||
(status, error, uri),
|
||||
)
|
||||
else:
|
||||
conn.execute(
|
||||
"""
|
||||
UPDATE resources
|
||||
SET status = ?, last_error = ?
|
||||
WHERE uri = ?
|
||||
""",
|
||||
(status, error, uri),
|
||||
)
|
||||
conn.commit()
|
||||
|
||||
def get_resource(self, uri: str) -> Resource | None:
|
||||
"""Get resource from the database."""
|
||||
with get_db_connection() as conn:
|
||||
row = conn.execute(
|
||||
"SELECT * FROM resources WHERE uri = ?",
|
||||
(uri,),
|
||||
).fetchone()
|
||||
if row:
|
||||
return Resource(**dict(row))
|
||||
return None
|
||||
|
||||
def get_resource_by_name(self, name: str) -> Resource | None:
|
||||
"""Get resource by name from the database."""
|
||||
with get_db_connection() as conn:
|
||||
row = conn.execute(
|
||||
"SELECT * FROM resources WHERE name = ?",
|
||||
(name,),
|
||||
).fetchone()
|
||||
if row:
|
||||
return Resource(**dict(row))
|
||||
return None
|
||||
|
||||
def get_all_resources(self) -> list[Resource]:
|
||||
"""Get all resources from the database."""
|
||||
with get_db_connection() as conn:
|
||||
rows = conn.execute("SELECT * FROM resources ORDER BY created_at DESC").fetchall()
|
||||
return [Resource(**dict(row)) for row in rows]
|
||||
|
||||
|
||||
resource_service = ResourceService()
|
||||
Reference in New Issue
Block a user