added a lock to avoid two inference at the same time and added consequent support for asynchronous generator based model

2025-01-10 19:11:48 +01:00 · 2025-01-10 19:11:48 +01:00 · 775c78c6cb
commit 775c78c6cb
parent c6d779f591
4 changed files with 19 additions and 12 deletions
--- a/source/model/base/BaseModel.py
+++ b/source/model/base/BaseModel.py
@ -106,20 +106,21 @@ class BaseModel(abc.ABC):
        Do not call manually, use `unload` instead.
        """

-    def infer(self, **kwargs) -> typing.Iterator[bytes]:
+    async def infer(self, **kwargs) -> typing.Iterator[bytes] | typing.AsyncIterator[bytes]:
        """
        Infer our payload through the model within the model manager
        :return: the response of the model
        """

-        # make sure we are loaded before an inference
-        self.load()
+        async with self.manager.inference_lock:
+            # make sure we are loaded before an inference
+            self.load()

-        # model specific inference part
-        return self._infer(**kwargs)
+            # model specific inference part
+            return self._infer(**kwargs)

    @abc.abstractmethod
-    def _infer(self, **kwargs) -> typing.Iterator[bytes]:
+    def _infer(self, **kwargs) -> typing.Iterator[bytes] | typing.AsyncIterator[bytes]:
        """
        Infer our payload through the model
        :return: the response of the model