Skip to content
Merged
Show file tree
Hide file tree
Changes from 17 commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 18 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,9 @@
## What is FLAML
FLAML is a lightweight Python library that finds accurate machine
learning models automatically, efficiently and economically. It frees users from selecting
models and hyperparameters for each model. It can also be used to tune generic hyperparameters for large language models (LLM), MLOps/LMOps workflows, pipelines, mathematical/statistical models, algorithms, computing experiments, software configurations and so on.
models and hyperparameters for each model. It can also be used to tune generic hyperparameters for foundation models, MLOps/LMOps workflows, pipelines, mathematical/statistical models, algorithms, computing experiments, software configurations and so on.

1. For common machine learning or AI tasks like classification, regression, and generation, it quickly finds quality models for user-provided data with low computational resources. It supports both classical machine learning models and deep neural networks, including large language models such as the OpenAI GPT-3 models.
1. For common machine learning or AI tasks like classification, regression, and generation, it quickly finds quality models for user-provided data with low computational resources. It supports both classical machine learning models and deep neural networks, including foundation models such as the GPT series.
1. It is easy to customize or extend. Users can find their desired customizability from a smooth range: minimal customization (computational resource budget), medium customization (e.g., scikit-style learner, search space and metric), or full customization (arbitrary training and evaluation code).
1. It supports fast automatic tuning, capable of handling complex constraints/guidance/early stopping. FLAML is powered by a new, [cost-effective
hyperparameter optimization](https://microsoft.github.io/FLAML/docs/Use-Cases/Tune-User-Defined-Function/#hyperparameter-optimization-algorithm)
Expand Down Expand Up @@ -95,6 +95,22 @@ estimator = LGBMRegressor()
estimator.fit(X_train, y_train)
```

* (New) You can optimize [generations](https://microsoft.github.io/FLAML/docs/Use-Cases/Auto-Generation) by ChatGPT or GPT-4 etc. with your own tuning data, success metrics and budgets.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

chatGPT and GPT-4 are not exclusive.


```python
from flaml import oai

config, analysis = oai.Completion.tune(
data=tune_data,
metric="success",
mode="max",
eval_func=success_metrics,
inference_budget=0.05,
optimization_budget=3,
num_samples=-1,
)
```

## Documentation

You can find a detailed documentation about FLAML [here](https://microsoft.github.io/FLAML/) where you can find the API documentation, use cases and examples.
Expand Down
2 changes: 1 addition & 1 deletion flaml/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from flaml.automl import AutoML, logger_formatter
from flaml.tune.searcher import CFO, BlendSearch, FLOW2, BlendSearchTuner, RandomSearch
from flaml.onlineml.autovw import AutoVW
from flaml.integrations import oai
from flaml.autogen import oai
from flaml.version import __version__


Expand Down
File renamed without changes.
166 changes: 166 additions & 0 deletions flaml/autogen/code_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,166 @@
import signal
import subprocess
import sys
from typing import List, Dict, Tuple, Optional, Union, Callable
from flaml import oai


def timeout_handler(signum, frame):
raise TimeoutError("Timed out!")


def execute_code(code: str, max_exec_time: Optional[int] = 3):
signal.signal(signal.SIGALRM, timeout_handler)
code = code.strip()
with open("codetest.py", "w") as fout:
fout.write(code)
try:
signal.alarm(max_exec_time)
result = subprocess.run(
[sys.executable, "codetest.py"],
stdout=subprocess.DEVNULL,
stderr=subprocess.PIPE,
)
signal.alarm(0)
except TimeoutError:
return 0
return int(result.returncode == 0)


def generate_assertions(
definition: str, model: Optional[str] = "gpt-3.5-turbo"
) -> Tuple[str, float]:
"""Generate assertions for a function.

Args:
definition (str): The function definition, including the signature and docstr.
model (str): The model used for generation.

Returns:
str: The generated assertions.
float: The cost of the generation.
"""
prompt = """Given the signature and docstring, write the exactly same number of assertion(s) for the provided example(s) in the docstring, without assertion messages.

func signature:
{definition}
assertions:"""
response = oai.Completion.create(
{"definition": definition},
model=model,
prompt=prompt,
max_tokens=256,
stop="\n\n",
)
cost = oai.Completion.cost(model, response)
assertions = oai.Completion.extract_text(response)[0]
return assertions, cost


def _remove_check(response):
"""Remove the check function from the response."""
# find the position of the check function
pos = response.find("def check(")
if pos == -1:
return response
return response[:pos]


def success_metrics(
responses: List[str],
definition: str,
test: Optional[str] = None,
entry_point: Optional[str] = None,
assertions: Optional[Union[str, Callable[[str], Tuple[str, float]]]] = None,
) -> Dict:
"""Check if the task is successful.

Args:
responses (list): The list of responses.
definition (str): The input definition.
test (Optional, str): The test code.
entry_point (Optional, str): The name of the function.
assertions (Optional, str or Callable): The assertion code which serves as a filter of the responses, or an assertion generator.
When provided, only the responses that pass the assertions will be considered for the actual test (if provided).

Returns:
dict: The success metrics.
"""
n = len(responses)
if assertions is None:
# no assertion filter
success_list = []
for i in range(n):
response = _remove_check(responses[i])
code = (
f"{response}\n{test}\ncheck({entry_point})"
if response.startswith("def")
else f"{definition}{response}\n{test}\ncheck({entry_point})"
)
success = execute_code(code)
success_list.append(success)
return {
"expected_success": 1 - pow(1 - sum(success_list) / n, n),
"success": any(s for s in success_list),
}
if callable(assertions) and n > 1:
# assertion generator
assertions, gen_cost = assertions(definition)
else:
gen_cost = 0
if n > 1 or test is None:
for i in range(n):
response = responses[i] = _remove_check(responses[i])
code = (
f"{response}\n{assertions}"
if response.startswith("def")
else f"{definition}{response}\n{assertions}"
)
succeed_assertions = execute_code(code)
if succeed_assertions:
break
else:
# just test, no need to check assertions
succeed_assertions = False
i, response = 0, responses[0]
if test is None:
# no test code
return {
"index_selected": i,
"succeed_assertions": succeed_assertions,
"gen_cost": gen_cost,
}
code_test = (
f"{response}\n{test}\ncheck({entry_point})"
if response.startswith("def")
else f"{definition}{response}\n{test}\ncheck({entry_point})"
)
success = execute_code(code_test)
return {
"index_selected": i,
"succeed_assertions": succeed_assertions,
"success": success,
"gen_cost": gen_cost,
}


def implement(definition: str, configs: List[Dict]) -> Tuple[str, float]:
"""Implement a function.

Args:
definition (str): The function definition, including the signature and docstr.
configs (list): The list of configurations for completion.

Returns:
str: The implementation.
float: The cost of the implementation.
int: The index of the configuration which generates the implementation.
"""
assertions, cost = generate_assertions(definition)
for i, config in enumerate(configs):
response = oai.Completion.create({"definition": definition}, **config)
cost += oai.Completion.cost(config["model"], response)
responses = oai.Completion.extract_text(response)
metrics = success_metrics(responses, definition, assertions=assertions)
if metrics["succeed_assertions"] or i == len(configs) - 1:
return responses[metrics["index_selected"]], cost, i
Loading