feat: add support for ollama RAG providers (#1427)

* fix: openai env * feat: add support for multiple RAG providers - Added provider, model and endpoint configuration options for RAG service - Updated RAG service to support both OpenAI and Ollama providers - Added Ollama embedding support and dependencies - Improved environment variable handling for RAG service configuration Signed-off-by: wfhtqp@gmail.com <wfhtqp@gmail.com> * fix: update docker env * feat: rag server add ollama llm * fix: pre-commit * feat: check embed model and clean * docs: add rag server config docs * fix: pyright ignore --------- Signed-off-by: wfhtqp@gmail.com <wfhtqp@gmail.com>
2025-03-04 11:07:40 +08:00
parent b01121bc39
commit de7cccd089
5 changed files with 89 additions and 18 deletions
--- a/py/rag-service/src/main.py
+++ b/py/rag-service/src/main.py
@@ -44,7 +44,10 @@ from llama_index.core import (
 )
 from llama_index.core.node_parser import CodeSplitter
 from llama_index.core.schema import Document
-from llama_index.embeddings.openai import OpenAIEmbedding
+from llama_index.embeddings.ollama import OllamaEmbedding
+from llama_index.embeddings.openai import OpenAIEmbedding, OpenAIEmbeddingModelType
+from llama_index.llms.ollama import Ollama
+from llama_index.llms.openai import OpenAI
 from llama_index.vector_stores.chroma import ChromaVectorStore
 from markdownify import markdownify as md
 from models.indexing_history import IndexingHistory  # noqa: TC002
@@ -311,14 +314,57 @@ init_db()

 # Initialize ChromaDB and LlamaIndex services
 chroma_client = chromadb.PersistentClient(path=str(CHROMA_PERSIST_DIR))
-chroma_collection = chroma_client.get_or_create_collection("documents")
+
+# Check if provider or model has changed
+current_provider = os.getenv("RAG_PROVIDER", "openai").lower()
+current_embed_model = os.getenv("RAG_EMBED_MODEL", "")
+current_llm_model = os.getenv("RAG_LLM_MODEL", "")
+
+# Try to read previous config
+config_file = BASE_DATA_DIR / "rag_config.json"
+if config_file.exists():
+    with Path.open(config_file, "r") as f:
+        prev_config = json.load(f)
+        if prev_config.get("provider") != current_provider or prev_config.get("embed_model") != current_embed_model:
+            # Clear existing data if config changed
+            logger.info("Detected config change, clearing existing data...")
+            chroma_client.reset()
+
+# Save current config
+with Path.open(config_file, "w") as f:
+    json.dump({"provider": current_provider, "embed_model": current_embed_model}, f)
+
+chroma_collection = chroma_client.get_or_create_collection("documents")  # pyright: ignore
 vector_store = ChromaVectorStore(chroma_collection=chroma_collection)
 storage_context = StorageContext.from_defaults(vector_store=vector_store)
-embed_model = OpenAIEmbedding()
-model = os.getenv("OPENAI_EMBED_MODEL", "")
-if model:
-    embed_model = OpenAIEmbedding(model=model)
+
+# Initialize embedding model based on provider
+llm_provider = current_provider
+base_url = os.getenv(llm_provider.upper() + "_API_BASE", "")
+rag_embed_model = current_embed_model
+rag_llm_model = current_llm_model
+
+if llm_provider == "ollama":
+    if base_url == "":
+        base_url = "http://localhost:11434"
+    if rag_embed_model == "":
+        rag_embed_model = "nomic-embed-text"
+    if rag_llm_model == "":
+        rag_llm_model = "llama3"
+    embed_model = OllamaEmbedding(model_name=rag_embed_model, base_url=base_url)
+    llm_model = Ollama(model=rag_llm_model, base_url=base_url, request_timeout=60.0)
+else:
+    if base_url == "":
+        base_url = "https://api.openai.com/v1"
+    if rag_embed_model == "":
+        rag_embed_model = OpenAIEmbeddingModelType.TEXT_EMBED_ADA_002
+    if rag_llm_model == "":
+        rag_llm_model = "gpt-3.5-turbo"
+    embed_model = OpenAIEmbedding(model=rag_embed_model, api_base=base_url)
+    llm_model = OpenAI(model=rag_llm_model, api_base=base_url)
+
 Settings.embed_model = embed_model
+Settings.llm = llm_model


 try: