Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
76290d9
initial porting of previous LLG patch
mmoskal Jan 25, 2025
f19655c
update for new APIs
mmoskal Jan 25, 2025
f4dc4b8
build: integrate llguidance as an external project
mmoskal Jan 25, 2025
afb6cac
use '%llguidance' as marker to enable llg lark syntax
mmoskal Jan 26, 2025
b5399d4
add some docs
mmoskal Jan 26, 2025
adc4aed
clarify docs
mmoskal Jan 26, 2025
2a92bfb
code style fixes
mmoskal Jan 26, 2025
8cb12d4
remove llguidance.h from .gitignore
mmoskal Jan 26, 2025
de269a1
fix tests when llg is enabled
mmoskal Jan 26, 2025
a7be666
pass vocab not model to llama_sampler_init_llg()
mmoskal Jan 26, 2025
3675050
copy test-grammar-integration.cpp to test-llguidance.cpp
mmoskal Jan 26, 2025
58006dd
clang fmt
mmoskal Jan 26, 2025
036b91f
fix ref-count bug
mmoskal Jan 26, 2025
f245ca2
build and run test
mmoskal Jan 26, 2025
16a5484
gbnf -> lark syntax
mmoskal Jan 26, 2025
2937537
conditionally include llguidance test based on LLAMA_LLGUIDANCE flag
mmoskal Jan 26, 2025
c7ebf57
rename llguidance test file to test-grammar-llguidance.cpp
mmoskal Jan 26, 2025
0a211fc
add gh action for llg test
mmoskal Jan 26, 2025
8e027f8
align tests with LLG grammar syntax and JSON Schema spec
mmoskal Jan 26, 2025
ca88ce7
llama_tokenizer() in fact requires valid utf8
mmoskal Jan 26, 2025
44e1973
update llg
mmoskal Jan 26, 2025
c9e9853
format file
mmoskal Jan 26, 2025
efc36c9
add $LLGUIDANCE_LOG_LEVEL support
mmoskal Jan 26, 2025
08fefd1
fix whitespace
mmoskal Jan 26, 2025
1afc53a
fix warning
mmoskal Jan 26, 2025
00fcd98
include <cmath> for INFINITY
mmoskal Jan 26, 2025
437ff31
add final newline
mmoskal Jan 26, 2025
5475357
fail llama_sampler_init_llg() at runtime
mmoskal Jan 29, 2025
d06448a
Link gbnf_to_lark.py script; fix links; refer to llg docs for lexemes
mmoskal Jan 29, 2025
59da969
simplify #includes
mmoskal Jan 30, 2025
d59d939
improve doc string for LLAMA_LLGUIDANCE
mmoskal Jan 30, 2025
6b2de55
Merge branch 'master' into llg
mmoskal Jan 31, 2025
a049afb
typo in merge
mmoskal Jan 31, 2025
7057589
bump llguidance to 0.6.12
mmoskal Jan 31, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
code style fixes
  • Loading branch information
mmoskal committed Jan 26, 2025
commit 2a92bfbe066c8c780a3930353149525ad797030a
2 changes: 1 addition & 1 deletion common/json-schema-to-grammar.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -999,7 +999,7 @@ std::string json_schema_to_grammar(const json & schema) {
callbacks.resolve_refs(copy);
callbacks.add_schema("", copy);
});
#endif
#endif // LLAMA_USE_LLGUIDANCE
}

std::string build_grammar(const std::function<void(const llama_grammar_builder &)> & cb) {
Expand Down
30 changes: 15 additions & 15 deletions common/llguidance.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@
#include "llguidance.h"

struct llama_sampler_llg {
const struct llama_model * model;
const struct llama_vocab * vocab;
const llama_model * model;
const llama_vocab * vocab;
std::string grammar_kind;
std::string grammar_data;
LlgTokenizer *tokenizer;
Expand All @@ -31,11 +31,11 @@ static LlgConstraint *llama_sampler_llg_new(LlgTokenizer *tokenizer,
return c;
}

static const char * llama_sampler_llg_name(const struct llama_sampler * /*smpl*/) {
static const char * llama_sampler_llg_name(const llama_sampler * /*smpl*/) {
return "llguidance";
}

static void llama_sampler_llg_accept_impl(struct llama_sampler * smpl, llama_token token) {
static void llama_sampler_llg_accept_impl(llama_sampler * smpl, llama_token token) {
auto * ctx = (llama_sampler_llg *) smpl->ctx;
if (ctx->grammar) {
LlgCommitResult res;
Expand All @@ -44,7 +44,7 @@ static void llama_sampler_llg_accept_impl(struct llama_sampler * smpl, llama_tok
}
}

static void llama_sampler_llg_apply(struct llama_sampler * smpl, llama_token_data_array * cur_p) {
static void llama_sampler_llg_apply(llama_sampler * smpl, llama_token_data_array * cur_p) {
auto * ctx = (llama_sampler_llg *) smpl->ctx;
if (ctx->grammar) {
if (!ctx->has_llg_res) {
Expand Down Expand Up @@ -76,7 +76,7 @@ static void llama_sampler_llg_apply(struct llama_sampler * smpl, llama_token_dat
}
}

static void llama_sampler_llg_reset(struct llama_sampler * smpl) {
static void llama_sampler_llg_reset(llama_sampler * smpl) {
auto * ctx = (llama_sampler_llg *) smpl->ctx;
if (!ctx->grammar) {
return;
Expand All @@ -88,7 +88,7 @@ static void llama_sampler_llg_reset(struct llama_sampler * smpl) {
ctx->has_llg_res = false;
}

static struct llama_sampler * llama_sampler_llg_clone(const struct llama_sampler * smpl) {
static llama_sampler * llama_sampler_llg_clone(const llama_sampler * smpl) {
const auto * ctx = (const llama_sampler_llg *) smpl->ctx;

auto * result = llama_sampler_init_llg(ctx->model, nullptr, nullptr);
Expand All @@ -108,7 +108,7 @@ static struct llama_sampler * llama_sampler_llg_clone(const struct llama_sampler
return result;
}

static void llama_sampler_llg_free(struct llama_sampler * smpl) {
static void llama_sampler_llg_free(llama_sampler * smpl) {
const auto * ctx = (llama_sampler_llg *) smpl->ctx;

if (ctx->grammar) {
Expand All @@ -119,7 +119,7 @@ static void llama_sampler_llg_free(struct llama_sampler * smpl) {
delete ctx;
}

static struct llama_sampler_i llama_sampler_llg_i = {
static llama_sampler_i llama_sampler_llg_i = {
/* .name = */ llama_sampler_llg_name,
/* .accept = */ llama_sampler_llg_accept_impl,
/* .apply = */ llama_sampler_llg_apply,
Expand All @@ -135,24 +135,24 @@ static size_t llama_sampler_llg_tokenize_fn(const void *user_data,
uint32_t *output_tokens,
size_t output_tokens_len)
{
const struct llama_vocab *vocab = (const struct llama_vocab *)user_data;
const llama_vocab *vocab = (const llama_vocab *)user_data;
int r = llama_tokenize(vocab, (const char *) bytes, bytes_len,
(int32_t*)output_tokens, output_tokens_len, false, true);
if (r < 0)
return -r;
return r;
}

static LlgTokenizer *llama_sampler_llg_new_tokenizer(const struct llama_model * model) {
static LlgTokenizer *llama_sampler_llg_new_tokenizer(const llama_model * model) {
// TODO store the tokenizer in the model somehow
static const struct llama_model *model_cache;
static const llama_model *model_cache;
static LlgTokenizer *tokenizer_cache;

if (model_cache == model) {
return llg_clone_tokenizer(tokenizer_cache);
}

const struct llama_vocab *vocab = llama_model_get_vocab(model);
const llama_vocab *vocab = llama_model_get_vocab(model);

auto tok_eos = llama_vocab_eot(vocab);
if (tok_eos == LLAMA_TOKEN_NULL)
Expand Down Expand Up @@ -226,7 +226,7 @@ static LlgTokenizer *llama_sampler_llg_new_tokenizer(const struct llama_model *
return tokenizer;
}

struct llama_sampler * llama_sampler_init_llg(const struct llama_model * model,
llama_sampler * llama_sampler_init_llg(const llama_model * model,
const char * grammar_kind, const char * grammar_data) {
auto * ctx = new llama_sampler_llg;

Expand Down Expand Up @@ -263,4 +263,4 @@ struct llama_sampler * llama_sampler_init_llg(const struct llama_model * model,
};
}

#endif
#endif // LLAMA_USE_LLGUIDANCE
2 changes: 1 addition & 1 deletion common/sampling.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,7 @@ struct common_sampler * common_sampler_init(const struct llama_model * model, co
grmr = llama_sampler_init_llg(model, "lark", params.grammar.c_str());
#else
GGML_ABORT("llguidance (cmake -DLLAMA_LLGUIDANCE=ON) is not enabled");
#endif
#endif // LLAMA_USE_LLGUIDANCE
} else {
grmr = llama_sampler_init_grammar(vocab, params.grammar.c_str(), "root");
}
Expand Down
4 changes: 2 additions & 2 deletions common/sampling.h
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,6 @@ std::vector<enum common_sampler_type> common_sampler_types_from_names(const std:
std::vector<enum common_sampler_type> common_sampler_types_from_chars(const std::string & chars);

#ifdef LLAMA_USE_LLGUIDANCE
struct llama_sampler * llama_sampler_init_llg(const struct llama_model * model,
struct llama_sampler * llama_sampler_init_llg(const llama_model * model,
const char * grammar_kind, const char * grammar_data);
#endif
#endif // LLAMA_USE_LLGUIDANCE