Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
975 changes: 975 additions & 0 deletions notebooks/how_to/qualitative_text/qualitative_text_generation.ipynb
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The content_id for model_overview_text should be model_overview, otherwise you would not be reusing the pre-existing block:

Image

Similarly, I get an error for:

vm.run_text_generation(
    content_id="dataset_description_text",
    context={"content_ids": vm.get_content_ids("data_description")},
).log()

Error:

SectionNotFoundError: Section for content dataset_description_text not found
[NOTE] During task with name 'qualitative_text_generation' and id '028072b6-3431-e084-5f94-a0f6ec353894'

The correct content_id should be dataset_summary_text:

Image

Similarly, the last cell should be fixed so it can run on the current version of the chun template, without modifications. This will require 2 changes:

  • Using the correct content_ids i.e. model_overview instead of model_overview_text
  • Adding a small improvement to the current feature (I know it adds a bit of scope) so that we can pre-assign a content block if the given content_id does not exist in the template. For example, the block intended_use_text does not exist in the template, so we should be able to specify the section_id where it should be appended. Note that this is already supported by test results blocks, e.g.:
perf_comparison_result.log(section_id="model_evaluation")
roc_curve_result.log(section_id="model_evaluation")

For the case of intended_use_text we could call .log(section_id="intended_use").

With these changes in place we should be able to populate 100% of the churn document with this notebook.

Large diffs are not rendered by default.

146 changes: 145 additions & 1 deletion tests/test_api_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,11 @@
import validmind.api_client as api_client
from validmind.__version__ import __version__
from validmind.errors import (
APIRequestError,
MissingAPICredentialsError,
MissingModelIdError,
APIRequestError,
)
from validmind.utils import md_to_html
from validmind.vm_models.figure import Figure


Expand Down Expand Up @@ -225,6 +226,27 @@ def test_log_metadata(self, mock_post: MagicMock):
),
)

@patch("aiohttp.ClientSession.post")
def test_log_metadata_with_section_id(self, mock_post: MagicMock):
mock_post.return_value = MockAsyncResponse(200, json={"cuid": "abc1234"})

self.run_async(
api_client.alog_metadata,
"1234",
text="Some Text",
section_id="intended_use",
)

mock_post.assert_called_with(
f"{os.environ['VM_API_HOST']}/log_metadata?section_id=intended_use",
data=json.dumps(
{
"content_id": "1234",
"text": "Some Text",
}
),
)

@patch("aiohttp.ClientSession.post")
def test_log_test_result(self, mock_post):
result = {
Expand All @@ -245,6 +267,128 @@ def test_log_test_result(self, mock_post):

mock_post.assert_called_with(url, data=json.dumps(result))

@patch("requests.post")
@patch("aiohttp.ClientSession.post")
def test_log_text_generates_text_and_logs_metadata(
self, mock_aiohttp_post, mock_requests_post
):
mock_requests_post.return_value = Mock(status_code=200)
mock_requests_post.return_value.json.return_value = {
"content": "## Generated Summary\nGenerated content."
}
mock_aiohttp_post.return_value = MockAsyncResponse(
200,
json={
"content_id": "dataset_summary_text",
"text": md_to_html("## Generated Summary\nGenerated content.", mathml=True),
},
)

api_client.log_text(
content_id="dataset_summary_text",
prompt="Summarize the dataset.",
context={"content_ids": ["train_dataset", "target_description_text"]},
)

mock_requests_post.assert_called_once_with(
url=f"{os.environ['VM_API_HOST']}/ai/generate/qualitative_text_generation",
headers={
"X-API-KEY": os.environ["VM_API_KEY"],
"X-API-SECRET": os.environ["VM_API_SECRET"],
"X-MODEL-CUID": os.environ["VM_API_MODEL"],
"X-MONITORING": "False",
"X-LIBRARY-VERSION": __version__,
},
json={
"content_id": "dataset_summary_text",
"generate": True,
"prompt": "Summarize the dataset.",
"context": {
"content_ids": ["train_dataset", "target_description_text"]
},
},
)
mock_aiohttp_post.assert_called_once_with(
f"{os.environ['VM_API_HOST']}/log_metadata",
data=json.dumps(
{
"content_id": "dataset_summary_text",
"text": md_to_html(
"## Generated Summary\nGenerated content.", mathml=True
),
}
),
)

@patch("requests.post")
@patch("aiohttp.ClientSession.post")
def test_log_text_logs_metadata_with_section_id(
self, mock_aiohttp_post, mock_requests_post
):
mock_requests_post.return_value = Mock(status_code=200)
mock_requests_post.return_value.json.return_value = {
"content": "Generated content."
}
mock_aiohttp_post.return_value = MockAsyncResponse(
200,
json={
"content_id": "dataset_summary_text",
"text": "Generated content.",
},
)

api_client.log_text(
content_id="dataset_summary_text",
prompt="Summarize the dataset.",
section_id="intended_use",
)

mock_requests_post.assert_called_once_with(
url=f"{os.environ['VM_API_HOST']}/ai/generate/qualitative_text_generation",
headers={
"X-API-KEY": os.environ["VM_API_KEY"],
"X-API-SECRET": os.environ["VM_API_SECRET"],
"X-MODEL-CUID": os.environ["VM_API_MODEL"],
"X-MONITORING": "False",
"X-LIBRARY-VERSION": __version__,
},
json={
"content_id": "dataset_summary_text",
"generate": True,
"prompt": "Summarize the dataset.",
"section_id": "intended_use",
},
)
mock_aiohttp_post.assert_called_once_with(
f"{os.environ['VM_API_HOST']}/log_metadata?section_id=intended_use",
data=json.dumps(
{
"content_id": "dataset_summary_text",
"text": "Generated content.",
}
),
)

def test_log_text_rejects_prompt_when_text_is_provided(self):
with self.assertRaisesRegex(
ValueError, "`prompt` is only supported when `text` is omitted"
):
api_client.log_text(
content_id="dataset_summary_text",
text="Hello world",
prompt="Ignore the provided text.",
)

def test_log_text_rejects_invalid_context(self):
with self.assertRaisesRegex(
ValueError,
"`context\\['content_ids'\\]` must contain only non-empty strings",
):
api_client.log_text(
content_id="dataset_summary_text",
context={"content_ids": ["valid", ""]},
)


if __name__ == "__main__":
unittest.main()
75 changes: 74 additions & 1 deletion tests/test_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,15 @@

import validmind
from validmind import (
get_content_ids,
init_dataset,
init_model,
get_test_suite,
run_text_generation,
run_documentation_tests,
)
from validmind.errors import UnsupportedModelError
from validmind.vm_models.result import TextGenerationResult


@dataclass
Expand Down Expand Up @@ -115,7 +118,7 @@ def test_init_model_invalid_metadata_dict(self):
"key": "value",
"foo": "bar",
}
with self.assertRaises(UnsupportedModelError) as context:
with self.assertRaises(UnsupportedModelError):
init_model(attributes=metadata, __log=False)

def test_init_model_metadata_dict(self):
Expand Down Expand Up @@ -163,6 +166,76 @@ def test_get_default_config(self):
self.assertIn("params", config)


class TestGetContentIds(TestCase):
@mock.patch(
"validmind.client_config.client_config.documentation_template",
MockedConfig.documentation_template,
)
def test_get_all_content_ids(self):
content_ids = get_content_ids()
self.assertEqual(
content_ids,
[
"validmind.data_validation.ClassImbalance",
"validmind.data_validation.DatasetSplit",
],
)

@mock.patch(
"validmind.client_config.client_config.documentation_template",
MockedConfig.documentation_template,
)
def test_get_content_ids_for_single_section(self):
content_ids = get_content_ids("test_section_1")
self.assertEqual(content_ids, ["validmind.data_validation.ClassImbalance"])

@mock.patch(
"validmind.client_config.client_config.documentation_template",
MockedConfig.documentation_template,
)
def test_get_content_ids_for_multiple_sections(self):
content_ids = get_content_ids(["test_section_1", "test_section_2"])
self.assertEqual(
content_ids,
[
"validmind.data_validation.ClassImbalance",
"validmind.data_validation.DatasetSplit",
],
)


class TestRunTextGeneration(TestCase):
@mock.patch(
"validmind.client.api_client._generate_log_text",
return_value="<p>Generated text</p>",
)
def test_run_text_generation(self, mock_generate_text):
result = run_text_generation(
content_id="dataset_summary_text",
prompt="Summarize the dataset.",
section_id="data_description",
context={"content_ids": ["train_dataset"]},
show=False,
)

self.assertIsInstance(result, TextGenerationResult)
self.assertEqual(result.content_id, "dataset_summary_text")
self.assertEqual(result.prompt, "Summarize the dataset.")
self.assertEqual(result.section_id, "data_description")
self.assertEqual(result.context, {"content_ids": ["train_dataset"]})
self.assertEqual(result.description, "<p>Generated text</p>")
self.assertTrue(result._was_description_generated)
self.assertIn("validmind", result.metadata)
self.assertIn("timestamp", result.metadata)
self.assertIn("duration_seconds", result.metadata)
mock_generate_text.assert_called_once_with(
"dataset_summary_text",
"Summarize the dataset.",
{"content_ids": ["train_dataset"]},
section_id="data_description",
)


# TODO: Fix this test
# class TestPreviewTemplate(TestCase):
# @mock.patch(
Expand Down
Loading
Loading