added a lock to avoid two inference at the same time and added consequent support for asynchronous generator based model
This commit is contained in:
parent
c6d779f591
commit
775c78c6cb
4 changed files with 19 additions and 12 deletions
|
@ -106,20 +106,21 @@ class BaseModel(abc.ABC):
|
|||
Do not call manually, use `unload` instead.
|
||||
"""
|
||||
|
||||
def infer(self, **kwargs) -> typing.Iterator[bytes]:
|
||||
async def infer(self, **kwargs) -> typing.Iterator[bytes] | typing.AsyncIterator[bytes]:
|
||||
"""
|
||||
Infer our payload through the model within the model manager
|
||||
:return: the response of the model
|
||||
"""
|
||||
|
||||
# make sure we are loaded before an inference
|
||||
self.load()
|
||||
async with self.manager.inference_lock:
|
||||
# make sure we are loaded before an inference
|
||||
self.load()
|
||||
|
||||
# model specific inference part
|
||||
return self._infer(**kwargs)
|
||||
# model specific inference part
|
||||
return self._infer(**kwargs)
|
||||
|
||||
@abc.abstractmethod
|
||||
def _infer(self, **kwargs) -> typing.Iterator[bytes]:
|
||||
def _infer(self, **kwargs) -> typing.Iterator[bytes] | typing.AsyncIterator[bytes]:
|
||||
"""
|
||||
Infer our payload through the model
|
||||
:return: the response of the model
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue