added a lock to avoid two inference at the same time and added consequent support for asynchronous generator based model
This commit is contained in:
parent
c6d779f591
commit
775c78c6cb
4 changed files with 19 additions and 12 deletions
|
@ -23,6 +23,9 @@ class ModelManager:
|
|||
# TODO(Faraphel): load more than one model at a time ? require a way more complex manager to handle memory issue
|
||||
self.current_loaded_model: typing.Optional[model.base.BaseModel] = None
|
||||
|
||||
# lock to avoid concurrent inference and concurrent model loading and unloading
|
||||
self.inference_lock = asyncio.Lock()
|
||||
|
||||
@self.application.get("/models")
|
||||
async def get_models() -> list[str]:
|
||||
"""
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue