added a lock to avoid two inference at the same time and added consequent support for asynchronous generator based model

2025-01-10 19:11:48 +01:00 · 2025-01-10 19:11:48 +01:00 · 775c78c6cb
commit 775c78c6cb
parent c6d779f591
4 changed files with 19 additions and 12 deletions
--- a/source/manager/ModelManager.py
+++ b/source/manager/ModelManager.py
@ -23,6 +23,9 @@ class ModelManager:
        # TODO(Faraphel): load more than one model at a time ? require a way more complex manager to handle memory issue
        self.current_loaded_model: typing.Optional[model.base.BaseModel] = None

+        # lock to avoid concurrent inference and concurrent model loading and unloading
+        self.inference_lock = asyncio.Lock()
+
        @self.application.get("/models")
        async def get_models() -> list[str]:
            """