added a lock to avoid two inference at the same time and added consequent support for asynchronous generator based model

This commit is contained in:
faraphel 2025-01-10 19:11:48 +01:00
parent c6d779f591
commit 775c78c6cb
4 changed files with 19 additions and 12 deletions

View file

@ -23,6 +23,9 @@ class ModelManager:
# TODO(Faraphel): load more than one model at a time ? require a way more complex manager to handle memory issue
self.current_loaded_model: typing.Optional[model.base.BaseModel] = None
# lock to avoid concurrent inference and concurrent model loading and unloading
self.inference_lock = asyncio.Lock()
@self.application.get("/models")
async def get_models() -> list[str]:
"""