added a lock to avoid two inference at the same time and added consequent support for asynchronous generator based model
This commit is contained in:
parent
c6d779f591
commit
775c78c6cb
4 changed files with 19 additions and 12 deletions
|
@ -50,7 +50,7 @@ class PythonModel(base.BaseModel):
|
|||
parameters = utils.parameters.load(configuration.get("inputs", {}))
|
||||
|
||||
# create an endpoint wrapping the inference inside a fastapi call
|
||||
async def infer_api(**kwargs):
|
||||
async def infer_api(**kwargs) -> fastapi.responses.StreamingResponse:
|
||||
# NOTE: fix an issue where it is not possible to give an UploadFile to a StreamingResponse
|
||||
# NOTE: perform a naive type(value).__name__ == "type_name" because fastapi do not use it own
|
||||
# fastapi.UploadFile class, but instead the starlette UploadFile class that is more of an implementation
|
||||
|
@ -61,8 +61,12 @@ class PythonModel(base.BaseModel):
|
|||
}
|
||||
|
||||
return fastapi.responses.StreamingResponse(
|
||||
content=self.infer(**kwargs),
|
||||
content=await self.infer(**kwargs),
|
||||
media_type=self.output_type,
|
||||
headers={
|
||||
# if the data is not text-like, mark it as an attachment to avoid display issue with Swagger UI
|
||||
"content-disposition": "inline" if utils.mimetypes.is_textlike(self.output_type) else "attachment"
|
||||
}
|
||||
)
|
||||
|
||||
infer_api.__signature__ = inspect.Signature(parameters=parameters)
|
||||
|
@ -81,5 +85,5 @@ class PythonModel(base.BaseModel):
|
|||
def _unload(self) -> None:
|
||||
return self.module.unload(self)
|
||||
|
||||
def _infer(self, **kwargs) -> typing.Iterator[bytes]:
|
||||
def _infer(self, **kwargs) -> typing.Iterator[bytes] | typing.Iterator[bytes]:
|
||||
return self.module.infer(self, **kwargs)
|
||||
|
|
|
@ -106,20 +106,21 @@ class BaseModel(abc.ABC):
|
|||
Do not call manually, use `unload` instead.
|
||||
"""
|
||||
|
||||
def infer(self, **kwargs) -> typing.Iterator[bytes]:
|
||||
async def infer(self, **kwargs) -> typing.Iterator[bytes] | typing.AsyncIterator[bytes]:
|
||||
"""
|
||||
Infer our payload through the model within the model manager
|
||||
:return: the response of the model
|
||||
"""
|
||||
|
||||
# make sure we are loaded before an inference
|
||||
self.load()
|
||||
async with self.manager.inference_lock:
|
||||
# make sure we are loaded before an inference
|
||||
self.load()
|
||||
|
||||
# model specific inference part
|
||||
return self._infer(**kwargs)
|
||||
# model specific inference part
|
||||
return self._infer(**kwargs)
|
||||
|
||||
@abc.abstractmethod
|
||||
def _infer(self, **kwargs) -> typing.Iterator[bytes]:
|
||||
def _infer(self, **kwargs) -> typing.Iterator[bytes] | typing.AsyncIterator[bytes]:
|
||||
"""
|
||||
Infer our payload through the model
|
||||
:return: the response of the model
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue