Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Use more performant fsm backend
  • Loading branch information
lapp0 committed Sep 27, 2024
commit 90f7144e298af7851034ecfe0f0f15cea8ab76eb
2 changes: 1 addition & 1 deletion .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ jobs:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: ["3.8", "3.10"]
python-version: ["3.10"]
steps:
- uses: actions/checkout@v3
- name: Set up Python ${{ matrix.python-version }}
Expand Down
5 changes: 0 additions & 5 deletions benchmarks/bench_cfg_guide.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,6 @@
from outlines.fsm.guide import CFGGuide
from outlines.models.transformers import TransformerTokenizer

from .common import ensure_numba_compiled

random.seed(42)


Expand All @@ -30,9 +28,6 @@ class CFGGuideBenchmark:

def setup(self, grammar_name):
self.tokenizer = get_tiny_tokenizer()
ensure_numba_compiled(
self.tokenizer
) # numba not currently used, but will be in the future
self.prebuilt_cfg_guide = CFGGuide(
benched_grammars[grammar_name], self.tokenizer
)
Expand Down
3 changes: 1 addition & 2 deletions benchmarks/bench_json_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from outlines.fsm.guide import RegexGuide
from outlines.fsm.json_schema import build_regex_from_schema

from .common import ensure_numba_compiled, setup_tokenizer # noqa: E402
from .common import setup_tokenizer # noqa: E402

simple_schema = """{
"$defs": {
Expand Down Expand Up @@ -69,7 +69,6 @@ class JsonSchemaBenchmark:
def setup(self, schema_name):
self.tokenizer = setup_tokenizer()
self.schema = schemas[schema_name]
ensure_numba_compiled(self.tokenizer)

@cache_disabled()
def time_json_schema_to_regex(self, schema_name):
Expand Down
34 changes: 0 additions & 34 deletions benchmarks/bench_numba_compile.py

This file was deleted.

4 changes: 1 addition & 3 deletions benchmarks/bench_regex_guide.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from outlines.caching import cache_disabled
from outlines.fsm.guide import RegexGuide

from .common import ensure_numba_compiled, setup_tokenizer
from .common import setup_tokenizer

regex_samples = {
"email": r"[a-z0-9!#$%&'*+/=?^_`{|}~-]+(?:\.[a-z0-9!#$%&'*+/=?^_`{|}~-]+)*@(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\.)+[a-z0-9](?:[a-z0-9-]*[a-z0-9])?",
Expand All @@ -21,7 +21,6 @@ class RegexGuideBenchmark:

def setup(self, pattern_name):
self.tokenizer = setup_tokenizer()
ensure_numba_compiled(self.tokenizer)
self.pattern = regex_samples[pattern_name]

@cache_disabled()
Expand All @@ -34,7 +33,6 @@ class MemoryRegexGuideBenchmark:

def setup(self, pattern_name):
self.tokenizer = setup_tokenizer()
ensure_numba_compiled(self.tokenizer)
self.pattern = regex_samples[pattern_name]

@cache_disabled()
Expand Down
6 changes: 0 additions & 6 deletions benchmarks/common.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,8 @@
from transformers import AutoTokenizer

from outlines.fsm.guide import RegexGuide
from outlines.models.transformers import TransformerTokenizer


def setup_tokenizer():
tokenizer = AutoTokenizer.from_pretrained("gpt2")
return TransformerTokenizer(tokenizer)


def ensure_numba_compiled(tokenizer):
RegexGuide("a", tokenizer)
return True
47 changes: 0 additions & 47 deletions outlines/fsm/fsm.py

This file was deleted.

Loading