Преглед на файлове

improve: generalize transformations and scripts of runner and preloads into TemplateTransformer (#4487)

Bowen Liang преди 11 месеца
родител
ревизия
5f4df34829

+ 3 - 2
api/core/helper/code_executor/code_executor.py

@@ -12,7 +12,7 @@ from config import get_env
 from core.helper.code_executor.entities import CodeDependency
 from core.helper.code_executor.javascript.javascript_transformer import NodeJsTemplateTransformer
 from core.helper.code_executor.jinja2.jinja2_transformer import Jinja2TemplateTransformer
-from core.helper.code_executor.python3.python3_transformer import PYTHON_STANDARD_PACKAGES, Python3TemplateTransformer
+from core.helper.code_executor.python3.python3_transformer import Python3TemplateTransformer
 from core.helper.code_executor.template_transformer import TemplateTransformer
 
 logger = logging.getLogger(__name__)
@@ -187,7 +187,8 @@ class CodeExecutor:
             response = response.json()
             dependencies = response.get('data', {}).get('dependencies', [])
             return [
-                CodeDependency(**dependency) for dependency in dependencies if dependency.get('name') not in PYTHON_STANDARD_PACKAGES
+                CodeDependency(**dependency) for dependency in dependencies
+                if dependency.get('name') not in Python3TemplateTransformer.get_standard_packages()
             ]
         except Exception as e:
             logger.exception(f'Failed to list dependencies: {e}')

+ 19 - 52
api/core/helper/code_executor/javascript/javascript_transformer.py

@@ -1,58 +1,25 @@
-import json
-import re
-from typing import Optional
+from textwrap import dedent
 
-from core.helper.code_executor.entities import CodeDependency
 from core.helper.code_executor.template_transformer import TemplateTransformer
 
-NODEJS_RUNNER = """// declare main function here
-{{code}}
-
-// execute main function, and return the result
-// inputs is a dict, unstructured inputs
-output = main({{inputs}})
-
-// convert output to json and print
-output = JSON.stringify(output)
-
-result = `<<RESULT>>${output}<<RESULT>>`
-
-console.log(result)
-"""
-
-NODEJS_PRELOAD = """"""
-
 
 class NodeJsTemplateTransformer(TemplateTransformer):
     @classmethod
-    def transform_caller(cls, code: str, inputs: dict, 
-                         dependencies: Optional[list[CodeDependency]] = None) -> tuple[str, str, list[CodeDependency]]:
-        """
-        Transform code to python runner
-        :param code: code
-        :param inputs: inputs
-        :return:
-        """
-
-        # transform inputs to json string
-        inputs_str = json.dumps(inputs, indent=4, ensure_ascii=False)
-
-        # replace code and inputs
-        runner = NODEJS_RUNNER.replace('{{code}}', code)
-        runner = runner.replace('{{inputs}}', inputs_str)
-
-        return runner, NODEJS_PRELOAD, []
-
-    @classmethod
-    def transform_response(cls, response: str) -> dict:
-        """
-        Transform response to dict
-        :param response: response
-        :return:
-        """
-        # extract result
-        result = re.search(r'<<RESULT>>(.*)<<RESULT>>', response, re.DOTALL)
-        if not result:
-            raise ValueError('Failed to parse result')
-        result = result.group(1)
-        return json.loads(result)
+    def get_runner_script(cls) -> str:
+        runner_script = dedent(
+            f"""
+            // declare main function
+            {cls._code_placeholder}
+            
+            // decode and prepare input object
+            var inputs_obj = JSON.parse(atob('{cls._inputs_placeholder}'))
+            
+            // execute main function
+            var output_obj = main(inputs_obj)
+            
+            // convert output to json and print
+            var output_json = JSON.stringify(output_obj)
+            var result = `<<RESULT>>${{output_json}}<<RESULT>>`
+            console.log(result)
+            """)
+        return runner_script

+ 48 - 92
api/core/helper/code_executor/jinja2/jinja2_transformer.py

@@ -1,94 +1,13 @@
-import json
-import re
-from base64 import b64encode
-from typing import Optional
+from textwrap import dedent
 
-from core.helper.code_executor.entities import CodeDependency
-from core.helper.code_executor.python3.python3_transformer import PYTHON_STANDARD_PACKAGES
+from core.helper.code_executor.python3.python3_transformer import Python3TemplateTransformer
 from core.helper.code_executor.template_transformer import TemplateTransformer
 
-PYTHON_RUNNER = """
-import jinja2
-from json import loads
-from base64 import b64decode
-
-template = jinja2.Template('''{{code}}''')
-
-def main(**inputs):
-    return template.render(**inputs)
-
-# execute main function, and return the result
-inputs = b64decode('{{inputs}}').decode('utf-8')
-output = main(**loads(inputs))
-
-result = f'''<<RESULT>>{output}<<RESULT>>'''
-
-print(result)
-
-"""
-
-JINJA2_PRELOAD_TEMPLATE = """{% set fruits = ['Apple'] %}
-{{ 'a' }}
-{% for fruit in fruits %}
-    <li>{{ fruit }}</li>
-{% endfor %}
-{% if fruits|length > 1 %}
-1
-{% endif %}
-{% for i in range(5) %}
-    {% if i == 3 %}{{ i }}{% else %}{% endif %}
-{% endfor %}
-    {% for i in range(3) %}
-        {{ i + 1 }}
-    {% endfor %}
-{% macro say_hello() %}a{{ 'b' }}{% endmacro %}
-{{ s }}{{ say_hello() }}"""
-
-JINJA2_PRELOAD = f"""
-import jinja2
-from base64 import b64decode
-
-def _jinja2_preload_():
-    # prepare jinja2 environment, load template and render before to avoid sandbox issue
-    template = jinja2.Template('''{JINJA2_PRELOAD_TEMPLATE}''')
-    template.render(s='a')
-
-if __name__ == '__main__':
-    _jinja2_preload_()
-
-"""
-
 
 class Jinja2TemplateTransformer(TemplateTransformer):
     @classmethod
-    def transform_caller(cls, code: str, inputs: dict, 
-                         dependencies: Optional[list[CodeDependency]] = None) -> tuple[str, str, list[CodeDependency]]:
-        """
-        Transform code to python runner
-        :param code: code
-        :param inputs: inputs
-        :return:
-        """
-
-        inputs_str = b64encode(json.dumps(inputs, ensure_ascii=False).encode()).decode('utf-8')
-
-        # transform jinja2 template to python code
-        runner = PYTHON_RUNNER.replace('{{code}}', code)
-        runner = runner.replace('{{inputs}}', inputs_str)
-
-        if not dependencies:
-            dependencies = []
-
-        # add native packages and jinja2
-        for package in PYTHON_STANDARD_PACKAGES.union(['jinja2']):
-            dependencies.append(CodeDependency(name=package, version=''))
-
-        # deduplicate
-        dependencies = list({
-            dep.name: dep for dep in dependencies if dep.name
-        }.values())
-
-        return runner, JINJA2_PRELOAD, dependencies
+    def get_standard_packages(cls) -> set[str]:
+        return {'jinja2'} | Python3TemplateTransformer.get_standard_packages()
 
     @classmethod
     def transform_response(cls, response: str) -> dict:
@@ -97,12 +16,49 @@ class Jinja2TemplateTransformer(TemplateTransformer):
         :param response: response
         :return:
         """
-        # extract result
-        result = re.search(r'<<RESULT>>(.*)<<RESULT>>', response, re.DOTALL)
-        if not result:
-            raise ValueError('Failed to parse result')
-        result = result.group(1)
-
         return {
-            'result': result
+            'result': cls.extract_result_str_from_response(response)
         }
+
+    @classmethod
+    def get_runner_script(cls) -> str:
+        runner_script = dedent(f"""
+            # declare main function
+            def main(**inputs):
+                import jinja2
+                template = jinja2.Template('''{cls._code_placeholder}''')
+                return template.render(**inputs)
+                
+            import json
+            from base64 import b64decode
+            
+            # decode and prepare input dict
+            inputs_obj = json.loads(b64decode('{cls._inputs_placeholder}').decode('utf-8'))
+            
+            # execute main function
+            output = main(**inputs_obj)
+            
+            # convert output and print
+            result = f'''<<RESULT>>{{output}}<<RESULT>>'''
+            print(result)
+            
+            """)
+        return runner_script
+
+    @classmethod
+    def get_preload_script(cls) -> str:
+        preload_script = dedent("""
+            import jinja2
+            from base64 import b64decode
+            
+            def _jinja2_preload_():
+                # prepare jinja2 environment, load template and render before to avoid sandbox issue
+                template = jinja2.Template('{{s}}')
+                template.render(s='a')
+            
+            if __name__ == '__main__':
+                _jinja2_preload_()
+            
+            """)
+
+        return preload_script

+ 42 - 74
api/core/helper/code_executor/python3/python3_transformer.py

@@ -1,83 +1,51 @@
-import json
-import re
-from base64 import b64encode
 from textwrap import dedent
-from typing import Optional
 
-from core.helper.code_executor.entities import CodeDependency
 from core.helper.code_executor.template_transformer import TemplateTransformer
 
-PYTHON_RUNNER = dedent("""
-# declare main function here
-{{code}}
-
-from json import loads, dumps
-from base64 import b64decode
-
-# execute main function, and return the result
-# inputs is a dict, and it
-inputs = b64decode('{{inputs}}').decode('utf-8')
-output = main(**json.loads(inputs))
-
-# convert output to json and print
-output = dumps(output, indent=4)
-
-result = f'''<<RESULT>>
-{output}
-<<RESULT>>'''
-
-print(result)
-""")
-
-PYTHON_PRELOAD = """"""
-
-PYTHON_STANDARD_PACKAGES = {
-    'json', 'datetime', 'math', 'random', 're', 'string', 'sys', 'time', 'traceback', 'uuid', 'os', 'base64',
-    'hashlib', 'hmac', 'binascii', 'collections', 'functools', 'operator', 'itertools', 'uuid',
-}
-
 
 class Python3TemplateTransformer(TemplateTransformer):
     @classmethod
-    def transform_caller(cls, code: str, inputs: dict, 
-                         dependencies: Optional[list[CodeDependency]] = None) -> tuple[str, str, list[CodeDependency]]:
-        """
-        Transform code to python runner
-        :param code: code
-        :param inputs: inputs
-        :return:
-        """
-        
-        # transform inputs to json string
-        inputs_str = b64encode(json.dumps(inputs, ensure_ascii=False).encode()).decode('utf-8')
-
-        # replace code and inputs
-        runner = PYTHON_RUNNER.replace('{{code}}', code)
-        runner = runner.replace('{{inputs}}', inputs_str)
-
-        # add standard packages
-        if dependencies is None:
-            dependencies = []
-
-        for package in PYTHON_STANDARD_PACKAGES:
-            if package not in dependencies:
-                dependencies.append(CodeDependency(name=package, version=''))
-
-        # deduplicate
-        dependencies = list({dep.name: dep for dep in dependencies if dep.name}.values())
+    def get_standard_packages(cls) -> set[str]:
+        return {
+            'base64',
+            'binascii',
+            'collections',
+            'datetime',
+            'functools',
+            'hashlib',
+            'hmac',
+            'itertools',
+            'json',
+            'math',
+            'operator',
+            'os',
+            'random',
+            're',
+            'string',
+            'sys',
+            'time',
+            'traceback',
+            'uuid',
+        }
 
-        return runner, PYTHON_PRELOAD, dependencies
-    
     @classmethod
-    def transform_response(cls, response: str) -> dict:
-        """
-        Transform response to dict
-        :param response: response
-        :return:
-        """
-        # extract result
-        result = re.search(r'<<RESULT>>(.*?)<<RESULT>>', response, re.DOTALL)
-        if not result:
-            raise ValueError('Failed to parse result')
-        result = result.group(1)
-        return json.loads(result)
+    def get_runner_script(cls) -> str:
+        runner_script = dedent(f"""
+            # declare main function
+            {cls._code_placeholder}
+            
+            import json
+            from base64 import b64decode
+            
+            # decode and prepare input dict
+            inputs_obj = json.loads(b64decode('{cls._inputs_placeholder}').decode('utf-8'))
+            
+            # execute main function
+            output_obj = main(**inputs_obj)
+            
+            # convert output to json and print
+            output_json = json.dumps(output_obj, indent=4)
+            result = f'''<<RESULT>>{{output_json}}<<RESULT>>'''
+            print(result)
+            """)
+        return runner_script

+ 66 - 7
api/core/helper/code_executor/template_transformer.py

@@ -1,13 +1,25 @@
+import json
+import re
 from abc import ABC, abstractmethod
+from base64 import b64encode
 from typing import Optional
 
+from pydantic import BaseModel
+
 from core.helper.code_executor.entities import CodeDependency
 
 
-class TemplateTransformer(ABC):
+class TemplateTransformer(ABC, BaseModel):
+    _code_placeholder: str = '{{code}}'
+    _inputs_placeholder: str = '{{inputs}}'
+    _result_tag: str = '<<RESULT>>'
+
     @classmethod
-    @abstractmethod
-    def transform_caller(cls, code: str, inputs: dict, 
+    def get_standard_packages(cls) -> set[str]:
+        return set()
+
+    @classmethod
+    def transform_caller(cls, code: str, inputs: dict,
                          dependencies: Optional[list[CodeDependency]] = None) -> tuple[str, str, list[CodeDependency]]:
         """
         Transform code to python runner
@@ -15,14 +27,61 @@ class TemplateTransformer(ABC):
         :param inputs: inputs
         :return: runner, preload
         """
-        pass
-    
+        runner_script = cls.assemble_runner_script(code, inputs)
+        preload_script = cls.get_preload_script()
+
+        packages = dependencies or []
+        standard_packages = cls.get_standard_packages()
+        for package in standard_packages:
+            if package not in packages:
+                packages.append(CodeDependency(name=package, version=''))
+        packages = list({dep.name: dep for dep in packages if dep.name}.values())
+
+        return runner_script, preload_script, packages
+
+    @classmethod
+    def extract_result_str_from_response(cls, response: str) -> str:
+        result = re.search(rf'{cls._result_tag}(.*){cls._result_tag}', response, re.DOTALL)
+        if not result:
+            raise ValueError('Failed to parse result')
+        result = result.group(1)
+        return result
+
     @classmethod
-    @abstractmethod
     def transform_response(cls, response: str) -> dict:
         """
         Transform response to dict
         :param response: response
         :return:
         """
-        pass
+        return json.loads(cls.extract_result_str_from_response(response))
+
+    @classmethod
+    @abstractmethod
+    def get_runner_script(cls) -> str:
+        """
+        Get runner script
+        """
+        pass
+
+    @classmethod
+    def serialize_inputs(cls, inputs: dict) -> str:
+        inputs_json_str = json.dumps(inputs, ensure_ascii=False).encode()
+        input_base64_encoded = b64encode(inputs_json_str).decode('utf-8')
+        return input_base64_encoded
+
+    @classmethod
+    def assemble_runner_script(cls, code: str, inputs: dict) -> str:
+        # assemble runner script
+        script = cls.get_runner_script()
+        script = script.replace(cls._code_placeholder, code)
+        inputs_str = cls.serialize_inputs(inputs)
+        script = script.replace(cls._inputs_placeholder, inputs_str)
+        return script
+
+    @classmethod
+    def get_preload_script(cls) -> str:
+        """
+        Get preload script
+        """
+        return ''

+ 10 - 1
api/tests/integration_tests/workflow/nodes/code_executor/test_code_javascript.py

@@ -2,6 +2,7 @@ from textwrap import dedent
 
 from core.helper.code_executor.code_executor import CodeExecutor, CodeLanguage
 from core.helper.code_executor.javascript.javascript_code_provider import JavascriptCodeProvider
+from core.helper.code_executor.javascript.javascript_transformer import NodeJsTemplateTransformer
 
 CODE_LANGUAGE = CodeLanguage.JAVASCRIPT
 
@@ -23,7 +24,8 @@ def test_javascript_json():
 
 def test_javascript_with_code_template():
     result = CodeExecutor.execute_workflow_code_template(
-        language=CODE_LANGUAGE, code=JavascriptCodeProvider.get_default_code(), inputs={'arg1': 'Hello', 'arg2': 'World'})
+        language=CODE_LANGUAGE, code=JavascriptCodeProvider.get_default_code(),
+        inputs={'arg1': 'Hello', 'arg2': 'World'})
     assert result == {'result': 'HelloWorld'}
 
 
@@ -32,3 +34,10 @@ def test_javascript_list_default_available_packages():
 
     # no default packages available for javascript
     assert len(packages) == 0
+
+
+def test_javascript_get_runner_script():
+    runner_script = NodeJsTemplateTransformer.get_runner_script()
+    assert runner_script.count(NodeJsTemplateTransformer._code_placeholder) == 1
+    assert runner_script.count(NodeJsTemplateTransformer._inputs_placeholder) == 1
+    assert runner_script.count(NodeJsTemplateTransformer._result_tag) == 2

+ 14 - 3
api/tests/integration_tests/workflow/nodes/code_executor/test_code_jinja2.py

@@ -1,7 +1,7 @@
 import base64
 
 from core.helper.code_executor.code_executor import CodeExecutor, CodeLanguage
-from core.helper.code_executor.jinja2.jinja2_transformer import JINJA2_PRELOAD, PYTHON_RUNNER
+from core.helper.code_executor.jinja2.jinja2_transformer import Jinja2TemplateTransformer
 
 CODE_LANGUAGE = CodeLanguage.JINJA2
 
@@ -9,8 +9,12 @@ CODE_LANGUAGE = CodeLanguage.JINJA2
 def test_jinja2():
     template = 'Hello {{template}}'
     inputs = base64.b64encode(b'{"template": "World"}').decode('utf-8')
-    code = PYTHON_RUNNER.replace('{{code}}', template).replace('{{inputs}}', inputs)
-    result = CodeExecutor.execute_code(language=CODE_LANGUAGE, preload=JINJA2_PRELOAD, code=code)
+    code = (Jinja2TemplateTransformer.get_runner_script()
+            .replace(Jinja2TemplateTransformer._code_placeholder, template)
+            .replace(Jinja2TemplateTransformer._inputs_placeholder, inputs))
+    result = CodeExecutor.execute_code(language=CODE_LANGUAGE,
+                                       preload=Jinja2TemplateTransformer.get_preload_script(),
+                                       code=code)
     assert result == '<<RESULT>>Hello World<<RESULT>>\n'
 
 
@@ -18,3 +22,10 @@ def test_jinja2_with_code_template():
     result = CodeExecutor.execute_workflow_code_template(
         language=CODE_LANGUAGE, code='Hello {{template}}', inputs={'template': 'World'})
     assert result == {'result': 'Hello World'}
+
+
+def test_jinja2_get_runner_script():
+    runner_script = Jinja2TemplateTransformer.get_runner_script()
+    assert runner_script.count(Jinja2TemplateTransformer._code_placeholder) == 1
+    assert runner_script.count(Jinja2TemplateTransformer._inputs_placeholder) == 1
+    assert runner_script.count(Jinja2TemplateTransformer._result_tag) == 2

+ 8 - 0
api/tests/integration_tests/workflow/nodes/code_executor/test_code_python3.py

@@ -3,6 +3,7 @@ from textwrap import dedent
 
 from core.helper.code_executor.code_executor import CodeExecutor, CodeLanguage
 from core.helper.code_executor.python3.python3_code_provider import Python3CodeProvider
+from core.helper.code_executor.python3.python3_transformer import Python3TemplateTransformer
 
 CODE_LANGUAGE = CodeLanguage.PYTHON3
 
@@ -35,3 +36,10 @@ def test_python3_list_default_available_packages():
 
     # check JSON serializable
     assert len(str(json.dumps(packages))) > 0
+
+
+def test_python3_get_runner_script():
+    runner_script = Python3TemplateTransformer.get_runner_script()
+    assert runner_script.count(Python3TemplateTransformer._code_placeholder) == 1
+    assert runner_script.count(Python3TemplateTransformer._inputs_placeholder) == 1
+    assert runner_script.count(Python3TemplateTransformer._result_tag) == 2