added auto-detection for the output type on the client side to be able to download binary files such as videos

added a lock to avoid two inference at the same time and added consequent support for asynchronous generator based model
2025-01-10 19:12:21 +01:00 · 2025-01-10 19:11:48 +01:00
7 changed files with 58 additions and 13 deletions
--- a/samples/models/dummy/config.json
+++ b/samples/models/dummy/config.json
@ -3,6 +3,8 @@
  "tags": ["dummy"],
  "file": "model.py",
  "output_type": "video/mp4",
  "inputs": {
    "file": {"type": "file"}
  }
--- a/samples/models/dummy/model.py
+++ b/samples/models/dummy/model.py
@ -1,4 +1,3 @@
 import json
 import typing
@ -8,5 +7,5 @@ def load(model) -> None:
 def unload(model) -> None:
    pass
-def infer(model, file) -> typing.Iterator[bytes]:
+async def infer(model, file) -> typing.AsyncIterator[bytes]:
-    yield json.dumps({"hello": "world!"}).encode("utf-8")
+    yield await file.read()
--- a/source/manager/ModelManager.py
+++ b/source/manager/ModelManager.py
@ -1,3 +1,4 @@
 import asyncio
 import json
 import os
 import typing
@ -10,6 +11,11 @@ from source import model, api
 class ModelManager:
    """
    The model manager
    Load the list of models available, ensure that only one model is loaded at the same time.
    """
    def __init__(self, application: api.Application, model_library: os.PathLike | str):
        self.application: api.Application = application
        self.model_library: Path = Path(model_library)
@ -20,9 +26,14 @@ class ModelManager:
        self.models: dict[str, model.base.BaseModel] = {}
        # the currently loaded model
-        # TODO(Faraphel): load more than one model at a time ? require a way more complex manager to handle memory issue
+        # TODO(Faraphel): load more than one model at a time ?
        #  would require a way more complex manager to handle memory issue
        #  having two calculations at the same time might not be worth it either
        self.current_loaded_model: typing.Optional[model.base.BaseModel] = None
        # lock to avoid concurrent inference and concurrent model loading and unloading
        self.inference_lock = asyncio.Lock()
        @self.application.get("/models")
        async def get_models() -> list[str]:
            """
--- a/source/model/PythonModel.py
+++ b/source/model/PythonModel.py
@ -50,7 +50,7 @@ class PythonModel(base.BaseModel):
        parameters = utils.parameters.load(configuration.get("inputs", {}))
        # create an endpoint wrapping the inference inside a fastapi call
-        async def infer_api(**kwargs):
+        async def infer_api(**kwargs) -> fastapi.responses.StreamingResponse:
            # NOTE: fix an issue where it is not possible to give an UploadFile to a StreamingResponse
            # NOTE: perform a naive type(value).__name__ == "type_name" because fastapi do not use it own
            #   fastapi.UploadFile class, but instead the starlette UploadFile class that is more of an implementation
@ -61,8 +61,12 @@ class PythonModel(base.BaseModel):
            }
            return fastapi.responses.StreamingResponse(
-                content=self.infer(**kwargs),
+                content=await self.infer(**kwargs),
                media_type=self.output_type,
                headers={
                    # if the data is not text-like, mark it as an attachment to avoid display issue with Swagger UI
                    "content-disposition": "inline" if utils.mimetypes.is_textlike(self.output_type) else "attachment"
                }
            )
        infer_api.__signature__ = inspect.Signature(parameters=parameters)
@ -73,6 +77,12 @@ class PythonModel(base.BaseModel):
            infer_api,
            methods=["POST"],
            tags=self.tags,
            # summary=...,
            # description=...,
            response_class=fastapi.responses.StreamingResponse,
            responses={
                200: {"content": {self.output_type: {}}}
            },
        )
    def _load(self) -> None:
@ -81,5 +91,5 @@ class PythonModel(base.BaseModel):
    def _unload(self) -> None:
        return self.module.unload(self)
-    def _infer(self, **kwargs) -> typing.Iterator[bytes]:
+    def _infer(self, **kwargs) -> typing.Iterator[bytes] | typing.Iterator[bytes]:
        return self.module.infer(self, **kwargs)
--- a/source/model/base/BaseModel.py
+++ b/source/model/base/BaseModel.py
@ -106,12 +106,13 @@ class BaseModel(abc.ABC):
        Do not call manually, use `unload` instead.
        """
-    def infer(self, **kwargs) -> typing.Iterator[bytes]:
+    async def infer(self, **kwargs) -> typing.Iterator[bytes] | typing.AsyncIterator[bytes]:
        """
        Infer our payload through the model within the model manager
        :return: the response of the model
        """
        async with self.manager.inference_lock:
            # make sure we are loaded before an inference
            self.load()
@ -119,7 +120,7 @@ class BaseModel(abc.ABC):
            return self._infer(**kwargs)
    @abc.abstractmethod
-    def _infer(self, **kwargs) -> typing.Iterator[bytes]:
+    def _infer(self, **kwargs) -> typing.Iterator[bytes] | typing.AsyncIterator[bytes]:
        """
        Infer our payload through the model
        :return: the response of the model
--- a/source/utils/init.py
+++ b/source/utils/init.py
@ -1 +1,2 @@
 from . import parameters
 from . import mimetypes
--- a/source/utils/mimetypes.py
+++ b/source/utils/mimetypes.py
@ -0,0 +1,21 @@
 def is_textlike(mimetype: str) -> bool:
    """
    Determinate if a mimetype is considered as holding text
    :param mimetype: the mimetype to check
    :return: True if the mimetype represent text, False otherwise
    """
    # check the family of the mimetype
    if mimetype.startswith("text/"):
        return True
    # check applications formats that are text formatted
    if mimetype in [
        "application/xml",
        "application/json",
        "application/javascript"
    ]:
        return True
    # otherwise consider the file as non-text
    return False
Author	SHA1	Message	Date
faraphel	1a49aa3779	added auto-detection for the output type on the client side to be able to download binary files such as videos	2025-01-10 19:12:21 +01:00
faraphel	775c78c6cb	added a lock to avoid two inference at the same time and added consequent support for asynchronous generator based model	2025-01-10 19:11:48 +01:00
`@ -1 +1,2 @@`
	`from . import parameters`	`from . import parameters`
		`from . import mimetypes`