From 78561f95ed6d98c69a717c9b0b5500074e344f59 Mon Sep 17 00:00:00 2001 From: Ludovico Mitchener Date: Wed, 19 Nov 2025 14:20:17 -0800 Subject: [PATCH] Edison Analysis Tutorial --- docs/edison_analysis_tutorial.ipynb | 252 ++++++++++++++++++++++++++++ docs/edison_analysis_tutorial.md | 178 ++++++++++++++++++++ 2 files changed, 430 insertions(+) create mode 100644 docs/edison_analysis_tutorial.ipynb create mode 100644 docs/edison_analysis_tutorial.md diff --git a/docs/edison_analysis_tutorial.ipynb b/docs/edison_analysis_tutorial.ipynb new file mode 100644 index 0000000..8dea78c --- /dev/null +++ b/docs/edison_analysis_tutorial.ipynb @@ -0,0 +1,252 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Edison Analysis API Tutorial\n", + "\n", + "This notebook provides you with an example usecase for using `Edison Analysis` to perform data analysis.\n", + "\n", + "The only dependency you need to follow along is `edison-client` which you can install via pip:\n", + "\n", + "```bash\n", + "pip install edison-client\n", + "```\n", + "\n", + "We recommend reading the edison client [docs](https://pypi.org/project/edison-client/) before following this tutorial.\n", + "\n", + "To run a `Edison Analysis` job you should take the following steps:\n", + "\n", + "1. Upload the any artifacts to the data storage service\n", + "2. Start an `Edison Analysis` run using the Edison client passing the data storage entry ids\n", + " along with any other details in the task config\n", + "3. Use the output of the task to obtain any data generated by the task" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import time\n", + "\n", + "from edison_client import EdisonClient\n", + "from edison_client.models import RuntimeConfig, TaskRequest\n", + "from edison_client.models.app import JobNames" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Instantiate the Edison client with your API key created via the platform\n", + "EDISON_API_KEY = \"\" # Add your API key here\n", + "client = EdisonClient(api_key=EDISON_API_KEY)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## File management with Edison Analysis\n", + "\n", + "`Edison Analysis` is designed to run data analysis on files provided by the user or caller. To provide `Edison Analysis` with this data, \n", + "you'll need to upload it to the Edison data storage service. This service is your one stop shop for sharing, storing and\n", + "updating data to be used in the Edison ecosystem." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Uploading a single file to the data storage service\n", + "single_file_upload_response = await client.astore_file_content(\n", + " name=\"Demo file entry for a single file\",\n", + " file_path=\"./datasets/brain_size_data.csv\", # ADD DATASET PATH HERE\n", + " description=\"This is a test file that will be be analysed by Edison Analysis\",\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Uploading a directory to the data storage service\n", + "directory_upload_response = await client.astore_file_content(\n", + " name=\"Demo file entry for a whole directory\",\n", + " file_path=\"./datasets\", # ADD DATASET FOLDER PATH HERE\n", + " description=\"This is a directory that will be be analysed by Edison Analysis\",\n", + " as_collection=True,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Running Your Job\n", + "\n", + "When running a `Edison Analysis` job there are some considerations to take with how you configure the agent. The first things \n", + "to note are the core configuration settings like `language`, `max_steps` and `query`. In addition to these core settings you have some\n", + "other options too. The key ones are listed below:\n", + "\n", + "### Additional tools available:\n", + "- `query_ensembl`: query the Ensembl database\n", + "- `get_convert_gene`: for converting gene IDs from one type to another, for example Ensembl, Entrez, Refseq.\n", + "- `search_web`: expose exa.ai (/search) web search as a tool\n", + "- `crawl_web`: expose exa.ai (/contents) web crawl as a tool\n", + "- `research_web`: expose exa.ai (/research) web research as a tool\n", + "- `query_literature`: allow `Edison Analysis` to do calls to `Edison Literature` for literature search\n", + "\n", + "- You can add in either user or system prompt for tool usage. For example: \"Use the query_literature tool to compare your findings against published literature.\"\n", + "\n", + "### Modifying system prompt\n", + "There are two options to modify the system prompt:\n", + "1. Replace the existing system prompt completely using `prompting_config[\"system_prompt\"]`\n", + "2. Append additional guideline to existing system prompt using `prompting_config[\"system_prompt_additional_guidelines]`\n", + "\n", + "Build the `prompting_config` dictionary then assign it to the `\"prompting_config\"` key within `environment_config`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Define your task\n", + "USER_QUERY = \"Teach me something new about crows.\" # The actual query you want Edison Analysis` to run\n", + "SYSTEM_PROMPT = \"\" # By setting this, you will replace the system prompt entirely.\n", + "SYSTEM_PROMPT_ADDITIONAL_GUIDELINES = (\n", + " \"Make all figures in dark mode.\" # This will be appended to the system prompt\n", + ")\n", + "_SYSTEM_PROMPT_CONFIG = {\n", + " \"system_prompt\": SYSTEM_PROMPT,\n", + " \"system_prompt_additional_guidelines\": SYSTEM_PROMPT_ADDITIONAL_GUIDELINES,\n", + "}\n", + "LANGUAGE = \"PYTHON\" # Choose between \"R\" and \"PYTHON\"\n", + "MAX_STEPS = 30 # You can change this to impose a limit on the number of steps the agent can take" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Create a task\n", + "task_data = TaskRequest(\n", + " name=JobNames.ANALYSIS,\n", + " query=USER_QUERY,\n", + " runtime_config=RuntimeConfig(\n", + " max_steps=MAX_STEPS,\n", + " environment_config={\n", + " \"language\": LANGUAGE,\n", + " \"prompting_config\": {\n", + " k: v for k, v in _SYSTEM_PROMPT_CONFIG.items() if v\n", + " }, # See above for documentation\n", + " \"data_storage_uris\": [\n", + " f\"data_entry:{directory_upload_response.data_storage.id}\"\n", + " ],\n", + " \"additional_tools\": None, # See above for options\n", + " },\n", + " ),\n", + ")\n", + "trajectory_id = client.create_task(task_data)\n", + "print(\n", + " f\"Task running on platform, you can view progress live at:https://platform.edisonscientific.com/trajectories/{trajectory_id}\"\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Jobs take on average 3-10 minutes to complete\n", + "# We also have inbuilt support for polling, asynchronous tasks and other utilities documented here:\n", + "# https://edisonscientific.gitbook.io/edison-cookbook/edison-client\n", + "status = \"in progress\"\n", + "while status in {\"in progress\", \"queued\"}:\n", + " status = client.get_task(trajectory_id).status\n", + " time.sleep(15)\n", + "\n", + "if status == \"failed\":\n", + " raise RuntimeError(\"Task failed\")\n", + "\n", + "job_result = client.get_task(trajectory_id, verbose=True)\n", + "answer = job_result.environment_frame[\"state\"][\"state\"][\"answer\"]\n", + "print(f\"The agent's answer to your research question is: \\n{answer}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Download Task Output\n", + "\n", + "While the task is executing it will create some artifacts. First the notebook \n", + "which is where the analysis code will be written and any other artifacts creating during the task.\n", + "\n", + "Once the task has completed you may want to check the contents of the notebook or look through the artifacts generated. \n", + "To obtain these artifacts, you will need to inspect the output of the agent's final `environment_frame`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "output_data = job_result.environment_frame[\"state\"][\"info\"][\"output_data\"]\n", + "print(output_data)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "for output_file in output_data:\n", + " download_response = await client.afetch_data_from_storage(\n", + " data_storage_id=output_file[\"entry_id\"]\n", + " )\n", + "\n", + " # Note there are two potential outcomes here. One where the client downloads\n", + " # the file to your local filesystem if it's above ~10MB. The second is where\n", + " # it will return a RawFetchResponse object which contains the raw content.\n", + " print(download_response)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/docs/edison_analysis_tutorial.md b/docs/edison_analysis_tutorial.md new file mode 100644 index 0000000..bf4c50f --- /dev/null +++ b/docs/edison_analysis_tutorial.md @@ -0,0 +1,178 @@ +--- +jupyter: + jupytext: + text_representation: + extension: .md + format_name: markdown + format_version: '1.3' + jupytext_version: 1.16.7 + kernelspec: + display_name: .venv + language: python + name: python3 +--- + + +# Edison Analysis API Tutorial + +This notebook provides you with an example usecase for using `Edison Analysis` to perform data analysis. + +The only dependency you need to follow along is `edison-client` which you can install via pip: + +```bash +pip install edison-client +``` + +We recommend reading the edison client [docs](https://pypi.org/project/edison-client/) before following this tutorial. + +To run a `Edison Analysis` job you should take the following steps: + +1. Upload the any artifacts to the data storage service +2. Start an `Edison Analysis` run using the Edison client passing the data storage entry ids + along with any other details in the task config +3. Use the output of the task to obtain any data generated by the task + + +```python +import time + +from edison_client import EdisonClient +from edison_client.models import RuntimeConfig, TaskRequest +from edison_client.models.app import JobNames +``` + +```python +# Instantiate the Edison client with your API key created via the platform +EDISON_API_KEY = "" # Add your API key here +client = EdisonClient(api_key=EDISON_API_KEY) +``` + +## File management with Edison Analysis + +`Edison Analysis` is designed to run data analysis on files provided by the user or caller. To provide `Edison Analysis` with this data, +you'll need to upload it to the Edison data storage service. This service is your one stop shop for sharing, storing and +updating data to be used in the Edison ecosystem. + +```python +# Uploading a single file to the data storage service +single_file_upload_response = await client.astore_file_content( + name="Demo file entry for a single file", + file_path="./datasets/brain_size_data.csv", # ADD DATASET PATH HERE + description="This is a test file that will be be analysed by Edison Analysis", +) +``` + +```python +# Uploading a directory to the data storage service +directory_upload_response = await client.astore_file_content( + name="Demo file entry for a whole directory", + file_path="./datasets", # ADD DATASET FOLDER PATH HERE + description="This is a directory that will be be analysed by Edison Analysis", + as_collection=True, +) +``` + +## Running Your Job + +When running a `Edison Analysis` job there are some considerations to take with how you configure the agent. The first things +to note are the core configuration settings like `language`, `max_steps` and `query`. In addition to these core settings you have some +other options too. The key ones are listed below: + +### Additional tools available: +- `query_ensembl`: query the Ensembl database +- `get_convert_gene`: for converting gene IDs from one type to another, for example Ensembl, Entrez, Refseq. +- `search_web`: expose exa.ai (/search) web search as a tool +- `crawl_web`: expose exa.ai (/contents) web crawl as a tool +- `research_web`: expose exa.ai (/research) web research as a tool +- `query_literature`: allow `Edison Analysis` to do calls to `Edison Literature` for literature search + +- You can add in either user or system prompt for tool usage. For example: "Use the query_literature tool to compare your findings against published literature." + +### Modifying system prompt +There are two options to modify the system prompt: +1. Replace the existing system prompt completely using `prompting_config["system_prompt"]` +2. Append additional guideline to existing system prompt using `prompting_config["system_prompt_additional_guidelines]` + +Build the `prompting_config` dictionary then assign it to the `"prompting_config"` key within `environment_config` + +```python +# Define your task +USER_QUERY = "Teach me something new about crows." # The actual query you want Edison Analysis` to run +SYSTEM_PROMPT = "" # By setting this, you will replace the system prompt entirely. +SYSTEM_PROMPT_ADDITIONAL_GUIDELINES = ( + "Make all figures in dark mode." # This will be appended to the system prompt +) +_SYSTEM_PROMPT_CONFIG = { + "system_prompt": SYSTEM_PROMPT, + "system_prompt_additional_guidelines": SYSTEM_PROMPT_ADDITIONAL_GUIDELINES, +} +LANGUAGE = "PYTHON" # Choose between "R" and "PYTHON" +MAX_STEPS = 30 # You can change this to impose a limit on the number of steps the agent can take +``` + +```python +# Create a task +task_data = TaskRequest( + name=JobNames.ANALYSIS, + query=USER_QUERY, + runtime_config=RuntimeConfig( + max_steps=MAX_STEPS, + environment_config={ + "language": LANGUAGE, + "prompting_config": { + k: v for k, v in _SYSTEM_PROMPT_CONFIG.items() if v + }, # See above for documentation + "data_storage_uris": [ + f"data_entry:{directory_upload_response.data_storage.id}" + ], + "additional_tools": None, # See above for options + }, + ), +) +trajectory_id = client.create_task(task_data) +print( + f"Task running on platform, you can view progress live at:https://platform.edisonscientific.com/trajectories/{trajectory_id}" +) +``` + +```python +# Jobs take on average 3-10 minutes to complete +# We also have inbuilt support for polling, asynchronous tasks and other utilities documented here: +# https://edisonscientific.gitbook.io/edison-cookbook/edison-client +status = "in progress" +while status in {"in progress", "queued"}: + status = client.get_task(trajectory_id).status + time.sleep(15) + +if status == "failed": + raise RuntimeError("Task failed") + +job_result = client.get_task(trajectory_id, verbose=True) +answer = job_result.environment_frame["state"]["state"]["answer"] +print(f"The agent's answer to your research question is: \n{answer}") +``` + +## Download Task Output + +While the task is executing it will create some artifacts. First the notebook +which is where the analysis code will be written and any other artifacts creating during the task. + +Once the task has completed you may want to check the contents of the notebook or look through the artifacts generated. +To obtain these artifacts, you will need to inspect the output of the agent's final `environment_frame` + +```python +output_data = job_result.environment_frame["state"]["info"]["output_data"] +print(output_data) +``` + +```python +for output_file in output_data: + download_response = await client.afetch_data_from_storage( + data_storage_id=output_file["entry_id"] + ) + + # Note there are two potential outcomes here. One where the client downloads + # the file to your local filesystem if it's above ~10MB. The second is where + # it will return a RawFetchResponse object which contains the raw content. + print(download_response) +```