diff --git a/samples/models/dummy/config.json b/samples/models/dummy/config.json index c12d549..09be48c 100644 --- a/samples/models/dummy/config.json +++ b/samples/models/dummy/config.json @@ -3,8 +3,6 @@ "tags": ["dummy"], "file": "model.py", - "output_type": "video/mp4", - "inputs": { "file": {"type": "file"} } diff --git a/samples/models/dummy/model.py b/samples/models/dummy/model.py index 43be902..1586d61 100644 --- a/samples/models/dummy/model.py +++ b/samples/models/dummy/model.py @@ -1,3 +1,4 @@ +import json import typing @@ -7,5 +8,5 @@ def load(model) -> None: def unload(model) -> None: pass -async def infer(model, file) -> typing.AsyncIterator[bytes]: - yield await file.read() \ No newline at end of file +def infer(model, file) -> typing.Iterator[bytes]: + yield json.dumps({"hello": "world!"}).encode("utf-8") diff --git a/source/manager/ModelManager.py b/source/manager/ModelManager.py index 6313ab9..2c7106c 100644 --- a/source/manager/ModelManager.py +++ b/source/manager/ModelManager.py @@ -1,4 +1,3 @@ -import asyncio import json import os import typing @@ -11,11 +10,6 @@ from source import model, api class ModelManager: - """ - The model manager - Load the list of models available, ensure that only one model is loaded at the same time. - """ - def __init__(self, application: api.Application, model_library: os.PathLike | str): self.application: api.Application = application self.model_library: Path = Path(model_library) @@ -26,14 +20,9 @@ class ModelManager: self.models: dict[str, model.base.BaseModel] = {} # the currently loaded model - # TODO(Faraphel): load more than one model at a time ? - # would require a way more complex manager to handle memory issue - # having two calculations at the same time might not be worth it either + # TODO(Faraphel): load more than one model at a time ? require a way more complex manager to handle memory issue self.current_loaded_model: typing.Optional[model.base.BaseModel] = None - # lock to avoid concurrent inference and concurrent model loading and unloading - self.inference_lock = asyncio.Lock() - @self.application.get("/models") async def get_models() -> list[str]: """ diff --git a/source/model/PythonModel.py b/source/model/PythonModel.py index c23bceb..0d47fc6 100644 --- a/source/model/PythonModel.py +++ b/source/model/PythonModel.py @@ -50,7 +50,7 @@ class PythonModel(base.BaseModel): parameters = utils.parameters.load(configuration.get("inputs", {})) # create an endpoint wrapping the inference inside a fastapi call - async def infer_api(**kwargs) -> fastapi.responses.StreamingResponse: + async def infer_api(**kwargs): # NOTE: fix an issue where it is not possible to give an UploadFile to a StreamingResponse # NOTE: perform a naive type(value).__name__ == "type_name" because fastapi do not use it own # fastapi.UploadFile class, but instead the starlette UploadFile class that is more of an implementation @@ -61,12 +61,8 @@ class PythonModel(base.BaseModel): } return fastapi.responses.StreamingResponse( - content=await self.infer(**kwargs), + content=self.infer(**kwargs), media_type=self.output_type, - headers={ - # if the data is not text-like, mark it as an attachment to avoid display issue with Swagger UI - "content-disposition": "inline" if utils.mimetypes.is_textlike(self.output_type) else "attachment" - } ) infer_api.__signature__ = inspect.Signature(parameters=parameters) @@ -77,12 +73,6 @@ class PythonModel(base.BaseModel): infer_api, methods=["POST"], tags=self.tags, - # summary=..., - # description=..., - response_class=fastapi.responses.StreamingResponse, - responses={ - 200: {"content": {self.output_type: {}}} - }, ) def _load(self) -> None: @@ -91,5 +81,5 @@ class PythonModel(base.BaseModel): def _unload(self) -> None: return self.module.unload(self) - def _infer(self, **kwargs) -> typing.Iterator[bytes] | typing.Iterator[bytes]: + def _infer(self, **kwargs) -> typing.Iterator[bytes]: return self.module.infer(self, **kwargs) diff --git a/source/model/base/BaseModel.py b/source/model/base/BaseModel.py index 9072e97..d39d7d2 100644 --- a/source/model/base/BaseModel.py +++ b/source/model/base/BaseModel.py @@ -106,21 +106,20 @@ class BaseModel(abc.ABC): Do not call manually, use `unload` instead. """ - async def infer(self, **kwargs) -> typing.Iterator[bytes] | typing.AsyncIterator[bytes]: + def infer(self, **kwargs) -> typing.Iterator[bytes]: """ Infer our payload through the model within the model manager :return: the response of the model """ - async with self.manager.inference_lock: - # make sure we are loaded before an inference - self.load() + # make sure we are loaded before an inference + self.load() - # model specific inference part - return self._infer(**kwargs) + # model specific inference part + return self._infer(**kwargs) @abc.abstractmethod - def _infer(self, **kwargs) -> typing.Iterator[bytes] | typing.AsyncIterator[bytes]: + def _infer(self, **kwargs) -> typing.Iterator[bytes]: """ Infer our payload through the model :return: the response of the model diff --git a/source/utils/__init__.py b/source/utils/__init__.py index 1b48283..f6bd50e 100644 --- a/source/utils/__init__.py +++ b/source/utils/__init__.py @@ -1,2 +1 @@ from . import parameters -from . import mimetypes diff --git a/source/utils/mimetypes.py b/source/utils/mimetypes.py deleted file mode 100644 index 2b54dd8..0000000 --- a/source/utils/mimetypes.py +++ /dev/null @@ -1,21 +0,0 @@ -def is_textlike(mimetype: str) -> bool: - """ - Determinate if a mimetype is considered as holding text - :param mimetype: the mimetype to check - :return: True if the mimetype represent text, False otherwise - """ - - # check the family of the mimetype - if mimetype.startswith("text/"): - return True - - # check applications formats that are text formatted - if mimetype in [ - "application/xml", - "application/json", - "application/javascript" - ]: - return True - - # otherwise consider the file as non-text - return False \ No newline at end of file