added a lock to avoid two inference at the same time and added consequent support for asynchronous generator based model

This commit is contained in:
faraphel 2025-01-10 19:11:48 +01:00
parent c6d779f591
commit 775c78c6cb
4 changed files with 19 additions and 12 deletions

View file

@ -106,20 +106,21 @@ class BaseModel(abc.ABC):
Do not call manually, use `unload` instead.
"""
def infer(self, **kwargs) -> typing.Iterator[bytes]:
async def infer(self, **kwargs) -> typing.Iterator[bytes] | typing.AsyncIterator[bytes]:
"""
Infer our payload through the model within the model manager
:return: the response of the model
"""
# make sure we are loaded before an inference
self.load()
async with self.manager.inference_lock:
# make sure we are loaded before an inference
self.load()
# model specific inference part
return self._infer(**kwargs)
# model specific inference part
return self._infer(**kwargs)
@abc.abstractmethod
def _infer(self, **kwargs) -> typing.Iterator[bytes]:
def _infer(self, **kwargs) -> typing.Iterator[bytes] | typing.AsyncIterator[bytes]:
"""
Infer our payload through the model
:return: the response of the model