diff --git a/docs/ai_utils.rst b/docs/ai_utils.rst index 7bcd7931..b672e917 100644 --- a/docs/ai_utils.rst +++ b/docs/ai_utils.rst @@ -76,8 +76,8 @@ For SpaCy, you also need to download the language model, i.e. for small English python -m spacy download en_core_web_sm -For OpenAI LLM, you need to set up your configuration in environment variables, that it may depend on the type of access -you have (direct OpenAI access or Azure OpenAI): +For OpenAI LLM, you need to set up your configuration using environment variables or include it in the Toolium configuration. +The required parameters will vary depending on your access type (direct OpenAI or Azure OpenAI): .. code-block:: bash @@ -91,6 +91,110 @@ you have (direct OpenAI access or Azure OpenAI): AZURE_OPENAI_ENDPOINT= OPENAI_API_VERSION= +*[AI]* section:: + +[AI] +text_similarity_method: azure_openai +azure_endpoint: https://your-endpoint.azure.com +api_version: 2025-01-01-preview +azure_deployment: gpt-4o-mini + + +Text Criteria Analysis +---------------------- + +Text criteria analysis evaluates how well an input text matches a set of target characteristics +(e.g., tone, style, clarity, domain vocabulary, specific described content, etc) using an LLM. +Toolium provides utilities to both retrieve a structured analysis and assert quality thresholds. + +Usage +~~~~~ + +You can use functions from the `toolium.utils.ai_utils.text_analysis` module: + +* **get_text_criteria_analysis()**: returns a JSON string with an overall score and low-scored criteria. +* **assert_text_criteria()**: validates that the overall score is above a threshold and raises `AssertionError` otherwise. + +.. code-block:: python + + from toolium.utils.ai_utils.text_analysis import get_text_criteria_analysis, assert_text_criteria + import json + + input_text = "Hey team! Quick heads-up: deployment is done, all checks green." + text_criteria = [ + "professional tone", + "clear and concise message", + "it includes the status of the deployment" + ] + + # Get analysis (JSON string) + raw_analysis = get_text_criteria_analysis( + text_input=input_text, + text_criteria=text_criteria, + model_name="gpt-4o-mini", # optional + azure=False # True for Azure OpenAI + ) + + analysis = json.loads(raw_analysis) + print(analysis["overall_match"]) + print(analysis["features"]) # Only low-scored features (<= 0.2) + +.. code-block:: python + + from toolium.utils.ai_utils.text_analysis import assert_text_criteria + + # Assert text quality against a minimum score + assert_text_criteria( + text_input="This message should be short, formal and informative.", + text_criteria=["formal tone", "brevity", "informative content"], + threshold=0.75, + model_name="gpt-4o-mini", # optional + azure=False # set True for Azure OpenAI + ) + + +Configuration +~~~~~~~~~~~~~ + +Default text analysis method can be set in the properties.cfg file with the property *text_analysis_method* in +*[AI]* section:: + + [AI] + text_analysis_method: openai # Options: 'openai', 'azure_openai' (default: azure_openai) + + +Response Format +~~~~~~~~~~~~~~~ + +`get_text_criteria_analysis()` expects the LLM to return a JSON object with: + +.. code-block:: json + + { + "overall_match": 0.82, + "features": [ + { + "name": "formal tone", + "score": 0.15 + } + ] + } + +Where: + +- `overall_match`: float in `[0.0, 1.0]` +- `features`: list containing only low-scored criteria (score `<= 0.2`) + +Assertion Behavior +~~~~~~~~~~~~~~~~~~ + +`assert_text_criteria()`: + +- parses the JSON response returned by `get_text_criteria_analysis()` +- compares `overall_match` with the provided `threshold` +- raises `AssertionError` if `overall_match < threshold` +- logs low-scored features to help diagnose why validation failed + Text Readability ---------------- diff --git a/toolium/utils/ai_utils/openai.py b/toolium/utils/ai_utils/openai.py index 0974f16c..3677e2d3 100644 --- a/toolium/utils/ai_utils/openai.py +++ b/toolium/utils/ai_utils/openai.py @@ -51,6 +51,16 @@ def openai_request(system_message, user_message, model_name=None, azure=False, * response_format = kwargs.pop('response_format') kwargs.pop('response_format', None) client = AzureOpenAI(**kwargs) if azure else OpenAI(**kwargs) + if azure: + for key in ('azure_api_key', 'azure_endpoint', 'api_version', 'azure_deployment'): + value = config.get_optional('AI', key) + if value: + kwargs.setdefault(key, value) + else: + for key in ('openai_api_key', 'openai_temperature'): + value = config.get_optional('AI', key) + if value: + kwargs.setdefault(key, value) messages = [] if isinstance(system_message, list): for prompt in system_message: