Created
April 29, 2026 06:07
-
-
Save ChenYFan/ee5b0441e857b09135c3b2269c88f3a6 to your computer and use it in GitHub Desktop.
Nyirusu与llama.cpp的桥接程序
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| /* | |
| * nry_provider_bridge.c — Unified C bridge for NyirusuCore LlamaCpp provider | |
| * | |
| * Separates model/context/mmproj/sampler lifecycle for flexible JS-side management. | |
| * All four handles are independent and can be created/freed separately. | |
| * | |
| * Future HTTP API providers will not use this bridge. | |
| * For non-backpressure providers, pipeline layer simulates by: | |
| * stop current generation → modify context → new generation, UUID stays the same. | |
| */ | |
| #include "llama.h" | |
| #include "ggml.h" | |
| #include "mtmd.h" | |
| #include "mtmd-helper.h" | |
| #include <stdlib.h> | |
| #include <string.h> | |
| #include <stdio.h> | |
| /* ========== Handle types ========== */ | |
| typedef struct { | |
| struct llama_model *model; | |
| const struct llama_vocab *vocab; | |
| int32_t n_embd; | |
| } nry_model; | |
| typedef struct { | |
| struct llama_context *ctx; | |
| llama_memory_t mem; | |
| nry_model *model; /* non-owning back-reference */ | |
| llama_pos n_past; | |
| } nry_context; | |
| typedef struct { | |
| struct llama_sampler *chain; | |
| } nry_sampler; | |
| typedef struct { | |
| mtmd_context *mctx; | |
| } nry_mmproj; | |
| /* ========== Backend (call once) ========== */ | |
| void nry_backend_init(void) { | |
| llama_backend_init(); | |
| fprintf(stderr, "[nry] backend initialized\n"); | |
| } | |
| void nry_backend_free(void) { | |
| llama_backend_free(); | |
| fprintf(stderr, "[nry] backend freed\n"); | |
| } | |
| /* ========== Model ========== */ | |
| nry_model *nry_model_load(const char *path, int n_gpu_layers, int no_mmap, int vocab_only) { | |
| nry_model *m = (nry_model *)calloc(1, sizeof(nry_model)); | |
| if (!m) return NULL; | |
| struct llama_model_params mparams = llama_model_default_params(); | |
| mparams.n_gpu_layers = n_gpu_layers; | |
| mparams.use_mmap = !no_mmap; | |
| mparams.vocab_only = vocab_only ? true : false; | |
| fprintf(stderr, "[nry] loading model: %s (gpu_layers=%d, vocab_only=%d)\n", path, n_gpu_layers, vocab_only); | |
| m->model = llama_model_load_from_file(path, mparams); | |
| if (!m->model) { | |
| fprintf(stderr, "[nry] failed to load model\n"); | |
| free(m); | |
| return NULL; | |
| } | |
| m->vocab = llama_model_get_vocab(m->model); | |
| m->n_embd = llama_model_n_embd_inp(m->model); | |
| fprintf(stderr, "[nry] model loaded, n_embd=%d\n", m->n_embd); | |
| return m; | |
| } | |
| void nry_model_free(nry_model *m) { | |
| if (!m) return; | |
| if (m->model) llama_model_free(m->model); | |
| free(m); | |
| fprintf(stderr, "[nry] model freed\n"); | |
| } | |
| int32_t nry_model_n_embd(nry_model *m) { | |
| return m ? m->n_embd : 0; | |
| } | |
| int32_t nry_model_n_embd_out(nry_model *m) { | |
| return m ? llama_model_n_embd_out(m->model) : 0; | |
| } | |
| /* ========== Context ========== */ | |
| /* kv_type: 0=F16, 1=Q8_0, 2=Q4_0 */ | |
| static enum ggml_type kv_type_map(int kv_type) { | |
| switch (kv_type) { | |
| case 1: return GGML_TYPE_Q8_0; | |
| case 2: return GGML_TYPE_Q4_0; | |
| default: return GGML_TYPE_F16; | |
| } | |
| } | |
| nry_context *nry_context_create(nry_model *m, int n_ctx, int flash_attn, | |
| int kv_type, int n_batch, int n_ubatch) { | |
| if (!m || !m->model) return NULL; | |
| nry_context *c = (nry_context *)calloc(1, sizeof(nry_context)); | |
| if (!c) return NULL; | |
| struct llama_context_params cparams = llama_context_default_params(); | |
| cparams.no_perf = false; | |
| cparams.n_ctx = n_ctx; | |
| cparams.n_batch = n_batch > 0 ? n_batch : 512; | |
| cparams.n_ubatch = n_ubatch > 0 ? n_ubatch : 512; | |
| cparams.n_seq_max = 1; | |
| cparams.type_k = kv_type_map(kv_type); | |
| cparams.type_v = kv_type_map(kv_type); | |
| cparams.flash_attn_type = flash_attn ? LLAMA_FLASH_ATTN_TYPE_ENABLED : LLAMA_FLASH_ATTN_TYPE_DISABLED; | |
| cparams.offload_kqv = 1; | |
| c->ctx = llama_init_from_model(m->model, cparams); | |
| if (!c->ctx) { | |
| fprintf(stderr, "[nry] failed to create context\n"); | |
| free(c); | |
| return NULL; | |
| } | |
| c->mem = llama_get_memory(c->ctx); | |
| c->model = m; | |
| c->n_past = 0; | |
| fprintf(stderr, "[nry] context created, n_ctx=%d, kv_type=%d, flash_attn=%d\n", | |
| n_ctx, kv_type, flash_attn); | |
| return c; | |
| } | |
| /* pooling_type: 0=NONE, 1=MEAN, 2=CLS, 3=LAST */ | |
| nry_context *nry_context_create_embedding(nry_model *m, int n_ctx, int flash_attn, | |
| int kv_type, int n_batch, int n_ubatch, | |
| int pooling_type) { | |
| if (!m || !m->model) return NULL; | |
| nry_context *c = (nry_context *)calloc(1, sizeof(nry_context)); | |
| if (!c) return NULL; | |
| struct llama_context_params cparams = llama_context_default_params(); | |
| cparams.n_ctx = n_ctx > 0 ? n_ctx : 512; | |
| cparams.n_batch = n_batch > 0 ? n_batch : 512; | |
| cparams.n_ubatch = n_ubatch > 0 ? n_ubatch : 512; | |
| cparams.n_seq_max = 1; | |
| cparams.type_k = kv_type_map(kv_type); | |
| cparams.type_v = kv_type_map(kv_type); | |
| cparams.flash_attn_type = flash_attn ? LLAMA_FLASH_ATTN_TYPE_ENABLED : LLAMA_FLASH_ATTN_TYPE_DISABLED; | |
| cparams.offload_kqv = 1; | |
| cparams.embeddings = true; | |
| cparams.pooling_type = (enum llama_pooling_type)pooling_type; | |
| c->ctx = llama_init_from_model(m->model, cparams); | |
| if (!c->ctx) { | |
| fprintf(stderr, "[nry] failed to create embedding context\n"); | |
| free(c); | |
| return NULL; | |
| } | |
| c->mem = llama_get_memory(c->ctx); | |
| c->model = m; | |
| c->n_past = 0; | |
| fprintf(stderr, "[nry] embedding context created, n_ctx=%d, pooling_type=%d\n", | |
| cparams.n_ctx, pooling_type); | |
| return c; | |
| } | |
| void nry_set_embeddings(nry_context *c, int enabled) { | |
| if (c && c->ctx) llama_set_embeddings(c->ctx, enabled != 0); | |
| } | |
| void nry_context_clear(nry_context *c) { | |
| if (!c) return; | |
| llama_memory_clear(c->mem, true); | |
| c->n_past = 0; | |
| } | |
| void nry_context_free(nry_context *c) { | |
| if (!c) return; | |
| if (c->ctx) llama_free(c->ctx); | |
| free(c); | |
| fprintf(stderr, "[nry] context freed\n"); | |
| } | |
| int nry_context_n_past(nry_context *c) { | |
| return c ? (int)c->n_past : 0; | |
| } | |
| /* ========== Sampler ========== */ | |
| nry_sampler *nry_sampler_create(float temp, float top_p, float min_p, uint32_t seed) { | |
| nry_sampler *s = (nry_sampler *)calloc(1, sizeof(nry_sampler)); | |
| if (!s) return NULL; | |
| struct llama_sampler_chain_params sparams = llama_sampler_chain_default_params(); | |
| s->chain = llama_sampler_chain_init(sparams); | |
| llama_sampler_chain_add(s->chain, llama_sampler_init_penalties(64, 1.1f, 0.0f, 0.0f)); | |
| llama_sampler_chain_add(s->chain, llama_sampler_init_temp(temp)); | |
| llama_sampler_chain_add(s->chain, llama_sampler_init_top_p(top_p, 1)); | |
| if (min_p > 0.0f) { | |
| llama_sampler_chain_add(s->chain, llama_sampler_init_min_p(min_p, 1)); | |
| } | |
| llama_sampler_chain_add(s->chain, llama_sampler_init_dist(seed)); | |
| fprintf(stderr, "[nry] sampler created (temp=%.2f, top_p=%.2f, min_p=%.2f, seed=%u, repeat_penalty=1.1, last_n=64)\n", | |
| temp, top_p, min_p, seed); | |
| return s; | |
| } | |
| void nry_sampler_reset(nry_sampler *s) { | |
| if (s && s->chain) llama_sampler_reset(s->chain); | |
| } | |
| void nry_sampler_free(nry_sampler *s) { | |
| if (!s) return; | |
| if (s->chain) llama_sampler_free(s->chain); | |
| free(s); | |
| } | |
| void nry_sampler_update(nry_sampler *s, float temp, float top_p, float min_p, | |
| int32_t penalty_last_n, float penalty_repeat, | |
| float penalty_freq, float penalty_present) { | |
| if (!s) return; | |
| if (s->chain) llama_sampler_free(s->chain); | |
| struct llama_sampler_chain_params sparams = llama_sampler_chain_default_params(); | |
| s->chain = llama_sampler_chain_init(sparams); | |
| if (penalty_last_n != 0) { | |
| llama_sampler_chain_add(s->chain, llama_sampler_init_penalties(penalty_last_n, penalty_repeat, penalty_freq, penalty_present)); | |
| } | |
| llama_sampler_chain_add(s->chain, llama_sampler_init_temp(temp)); | |
| llama_sampler_chain_add(s->chain, llama_sampler_init_top_p(top_p, 1)); | |
| if (min_p > 0.0f) { | |
| llama_sampler_chain_add(s->chain, llama_sampler_init_min_p(min_p, 1)); | |
| } | |
| llama_sampler_chain_add(s->chain, llama_sampler_init_dist(0)); | |
| } | |
| /* ========== Perf ========== */ | |
| void nry_perf_context_reset(nry_context *c) { | |
| if (c && c->ctx) llama_perf_context_reset(c->ctx); | |
| } | |
| void nry_perf_context_read(nry_context *c, | |
| double *t_p_eval_ms, double *t_eval_ms, | |
| int32_t *n_p_eval, int32_t *n_eval) { | |
| if (!c || !c->ctx) { *t_p_eval_ms = 0; *t_eval_ms = 0; *n_p_eval = 0; *n_eval = 0; return; } | |
| struct llama_perf_context_data d = llama_perf_context(c->ctx); | |
| *t_p_eval_ms = d.t_p_eval_ms; | |
| *t_eval_ms = d.t_eval_ms; | |
| *n_p_eval = d.n_p_eval; | |
| *n_eval = d.n_eval; | |
| } | |
| /* ========== Mmproj ========== */ | |
| nry_mmproj *nry_mmproj_load(const char *path, nry_model *m, int use_gpu, int flash_attn) { | |
| if (!m || !m->model) return NULL; | |
| nry_mmproj *mm = (nry_mmproj *)calloc(1, sizeof(nry_mmproj)); | |
| if (!mm) return NULL; | |
| struct mtmd_context_params mctx_params = mtmd_context_params_default(); | |
| mctx_params.use_gpu = use_gpu ? true : false; | |
| mctx_params.print_timings = true; | |
| mctx_params.flash_attn_type = flash_attn ? LLAMA_FLASH_ATTN_TYPE_ENABLED : LLAMA_FLASH_ATTN_TYPE_DISABLED; | |
| fprintf(stderr, "[nry] loading mmproj: %s\n", path); | |
| mm->mctx = mtmd_init_from_file(path, m->model, mctx_params); | |
| if (!mm->mctx) { | |
| fprintf(stderr, "[nry] failed to load mmproj\n"); | |
| free(mm); | |
| return NULL; | |
| } | |
| fprintf(stderr, "[nry] mmproj loaded\n"); | |
| return mm; | |
| } | |
| void nry_mmproj_free(nry_mmproj *mm) { | |
| if (!mm) return; | |
| if (mm->mctx) mtmd_free(mm->mctx); | |
| free(mm); | |
| fprintf(stderr, "[nry] mmproj freed\n"); | |
| } | |
| /* ========== Tokenizer (model-level) ========== */ | |
| int nry_tokenize(nry_model *m, const char *text, int32_t *out, int max_tokens, int add_special) { | |
| if (!m) return -1; | |
| return llama_tokenize(m->vocab, text, strlen(text), out, max_tokens, add_special, true); | |
| } | |
| int nry_detokenize(nry_model *m, int32_t token, char *buf, int buf_size) { | |
| if (!m) return -1; | |
| return llama_token_to_piece(m->vocab, token, buf, buf_size, 0, true); | |
| } | |
| int nry_is_eog(nry_model *m, int32_t token) { | |
| if (!m) return 1; | |
| return llama_vocab_is_eog(m->vocab, token) ? 1 : 0; | |
| } | |
| /* ========== Decode operations (context-level) ========== */ | |
| int nry_decode_tokens(nry_context *c, int32_t *tokens, int n) { | |
| if (!c || !c->ctx) return -1; | |
| struct llama_batch batch = llama_batch_get_one(tokens, n); | |
| int ret = llama_decode(c->ctx, batch); | |
| if (ret == 0) c->n_past += n; | |
| return ret; | |
| } | |
| int nry_decode_one(nry_context *c, int32_t token) { | |
| if (!c || !c->ctx) return -1; | |
| int32_t tokens[1] = { token }; | |
| struct llama_batch batch = llama_batch_get_one(tokens, 1); | |
| int ret = llama_decode(c->ctx, batch); | |
| if (ret == 0) c->n_past++; | |
| return ret; | |
| } | |
| int nry_decode_text(nry_context *c, const char *text, int add_special) { | |
| if (!c || !c->model) return -1; | |
| int32_t tokens[8192]; | |
| int n = llama_tokenize(c->model->vocab, text, strlen(text), tokens, 8192, add_special, true); | |
| if (n < 0) return -1; | |
| int batch_size = 512; | |
| for (int i = 0; i < n; i += batch_size) { | |
| int n_cur = (n - i < batch_size) ? (n - i) : batch_size; | |
| struct llama_batch batch = llama_batch_get_one(tokens + i, n_cur); | |
| int ret = llama_decode(c->ctx, batch); | |
| if (ret != 0) return ret; | |
| c->n_past += n_cur; | |
| } | |
| return 0; | |
| } | |
| /* Embedding file format: int32 n_tokens + int32 n_embd + int32 n_pos + float[n_tokens*n_embd] */ | |
| int nry_decode_embd_file(nry_context *c, const char *path) { | |
| if (!c || !c->model) return -1; | |
| FILE *f = fopen(path, "rb"); | |
| if (!f) return -1; | |
| int32_t n_tokens, n_embd, n_pos; | |
| if (fread(&n_tokens, sizeof(int32_t), 1, f) != 1 || | |
| fread(&n_embd, sizeof(int32_t), 1, f) != 1 || | |
| fread(&n_pos, sizeof(int32_t), 1, f) != 1) { | |
| fclose(f); | |
| return -2; | |
| } | |
| if (n_embd != c->model->n_embd) { | |
| fprintf(stderr, "[nry] embd dimension mismatch: file=%d, model=%d\n", n_embd, c->model->n_embd); | |
| fclose(f); | |
| return -3; | |
| } | |
| float *embd_data = (float *)malloc(n_tokens * n_embd * sizeof(float)); | |
| if (!embd_data) { fclose(f); return -4; } | |
| if (fread(embd_data, sizeof(float), n_tokens * n_embd, f) != (size_t)(n_tokens * n_embd)) { | |
| free(embd_data); | |
| fclose(f); | |
| return -5; | |
| } | |
| fclose(f); | |
| fprintf(stderr, "[nry] injecting embedding: n_tokens=%d, n_embd=%d\n", n_tokens, n_embd); | |
| int batch_size = 512; | |
| for (int i = 0; i < n_tokens; i += batch_size) { | |
| int n_cur = (n_tokens - i < batch_size) ? (n_tokens - i) : batch_size; | |
| struct llama_batch batch = { | |
| .n_tokens = n_cur, | |
| .token = NULL, | |
| .embd = embd_data + i * n_embd, | |
| .pos = NULL, | |
| .n_seq_id = NULL, | |
| .seq_id = NULL, | |
| .logits = NULL, | |
| }; | |
| int ret = llama_decode(c->ctx, batch); | |
| if (ret != 0) { | |
| fprintf(stderr, "[nry] embd decode failed at offset %d\n", i); | |
| free(embd_data); | |
| return -6; | |
| } | |
| c->n_past += n_cur; | |
| } | |
| free(embd_data); | |
| fprintf(stderr, "[nry] embedding injected, n_past=%d\n", (int)c->n_past); | |
| return 0; | |
| } | |
| /* ========== Sampling ========== */ | |
| int32_t nry_sample(nry_context *c, nry_sampler *s) { | |
| if (!c || !s) return -1; | |
| return llama_sampler_sample(s->chain, c->ctx, -1); | |
| } | |
| /* ========== Text embedding extraction ========== */ | |
| int nry_text_embed(nry_model *m, nry_context *c, const char *text, | |
| float *out, int max_embd) { | |
| if (!m || !c || !c->ctx) return -1; | |
| int32_t tokens[8192]; | |
| int n = llama_tokenize(m->vocab, text, strlen(text), tokens, 8192, 1, 1); | |
| if (n <= 0) return -2; | |
| llama_memory_clear(c->mem, true); | |
| c->n_past = 0; | |
| struct llama_batch batch = llama_batch_init(n, 0, 1); | |
| for (int i = 0; i < n; i++) { | |
| batch.token[i] = tokens[i]; | |
| batch.pos[i] = i; | |
| batch.n_seq_id[i] = 1; | |
| batch.seq_id[i][0] = 0; | |
| batch.logits[i] = 1; | |
| } | |
| batch.n_tokens = n; | |
| int ret = llama_decode(c->ctx, batch); | |
| if (ret != 0) { | |
| fprintf(stderr, "[nry] text embed decode failed: %d\n", ret); | |
| llama_batch_free(batch); | |
| return -3; | |
| } | |
| const float *embd = llama_get_embeddings_seq(c->ctx, 0); | |
| if (!embd) { | |
| embd = llama_get_embeddings_ith(c->ctx, -1); | |
| } | |
| if (!embd) { | |
| fprintf(stderr, "[nry] text embed: no embeddings available\n"); | |
| llama_batch_free(batch); | |
| return -4; | |
| } | |
| int n_embd_out = llama_model_n_embd_out(m->model); | |
| int n_copy = (n_embd_out < max_embd) ? n_embd_out : max_embd; | |
| memcpy(out, embd, n_copy * sizeof(float)); | |
| llama_batch_free(batch); | |
| c->n_past = n; | |
| fprintf(stderr, "[nry] text embedding extracted, n_tokens=%d, n_embd_out=%d\n", | |
| n, n_embd_out); | |
| return n_embd_out; | |
| } | |
| /* ========== Embedding extraction (mmproj) ========== */ | |
| int nry_extract_image(nry_model *m, nry_mmproj *mm, const char *image_path, const char *output_path) { | |
| if (!m || !mm || !mm->mctx) return -1; | |
| mtmd_bitmap *bitmap = mtmd_helper_bitmap_init_from_file(mm->mctx, image_path); | |
| if (!bitmap) { | |
| fprintf(stderr, "[nry] failed to load image: %s\n", image_path); | |
| return -1; | |
| } | |
| const char *marker = mtmd_default_marker(); | |
| char prompt[512]; | |
| snprintf(prompt, sizeof(prompt), "%s", marker); | |
| mtmd_input_chunks *chunks = mtmd_input_chunks_init(); | |
| mtmd_input_text text = { prompt, false, true }; | |
| const mtmd_bitmap *bitmaps[] = { bitmap }; | |
| int32_t ret = mtmd_tokenize(mm->mctx, chunks, &text, bitmaps, 1); | |
| if (ret != 0) { | |
| mtmd_bitmap_free(bitmap); | |
| mtmd_input_chunks_free(chunks); | |
| return -2; | |
| } | |
| /* find image chunk */ | |
| const mtmd_input_chunk *img_chunk = NULL; | |
| size_t n_chunks = mtmd_input_chunks_size(chunks); | |
| for (size_t i = 0; i < n_chunks; i++) { | |
| const mtmd_input_chunk *ch = mtmd_input_chunks_get(chunks, i); | |
| if (mtmd_input_chunk_get_type(ch) == MTMD_INPUT_CHUNK_TYPE_IMAGE) { | |
| img_chunk = ch; | |
| break; | |
| } | |
| } | |
| if (!img_chunk) { | |
| mtmd_bitmap_free(bitmap); | |
| mtmd_input_chunks_free(chunks); | |
| return -3; | |
| } | |
| ret = mtmd_encode_chunk(mm->mctx, img_chunk); | |
| if (ret != 0) { | |
| mtmd_bitmap_free(bitmap); | |
| mtmd_input_chunks_free(chunks); | |
| return -4; | |
| } | |
| int32_t n_tokens = (int32_t)mtmd_input_chunk_get_n_tokens(img_chunk); | |
| int32_t n_embd = llama_model_n_embd_inp(m->model); | |
| int32_t n_pos = (int32_t)mtmd_input_chunk_get_n_pos(img_chunk); | |
| float *embd = mtmd_get_output_embd(mm->mctx); | |
| fprintf(stderr, "[nry] extracted: n_tokens=%d, n_embd=%d, n_pos=%d\n", n_tokens, n_embd, n_pos); | |
| FILE *f = fopen(output_path, "wb"); | |
| if (!f) { | |
| mtmd_bitmap_free(bitmap); | |
| mtmd_input_chunks_free(chunks); | |
| return -5; | |
| } | |
| fwrite(&n_tokens, sizeof(int32_t), 1, f); | |
| fwrite(&n_embd, sizeof(int32_t), 1, f); | |
| fwrite(&n_pos, sizeof(int32_t), 1, f); | |
| fwrite(embd, sizeof(float), n_tokens * n_embd, f); | |
| fclose(f); | |
| fprintf(stderr, "[nry] saved embedding to %s (%d bytes)\n", | |
| output_path, (int)(12 + n_tokens * n_embd * sizeof(float))); | |
| mtmd_bitmap_free(bitmap); | |
| mtmd_input_chunks_free(chunks); | |
| return 0; | |
| } | |
| /* ========== Context state save/load ========== */ | |
| size_t nry_context_state_size(nry_context *c) { | |
| if (!c || !c->ctx) return 0; | |
| return llama_state_seq_get_size(c->ctx, 0); | |
| } | |
| size_t nry_context_state_save(nry_context *c, uint8_t *buf, size_t buf_size) { | |
| if (!c || !c->ctx) return 0; | |
| return llama_state_seq_get_data(c->ctx, buf, buf_size, 0); | |
| } | |
| size_t nry_context_state_load(nry_context *c, const uint8_t *buf, size_t buf_size) { | |
| if (!c || !c->ctx) return 0; | |
| llama_memory_clear(c->mem, true); | |
| size_t read = llama_state_seq_set_data(c->ctx, buf, buf_size, 0); | |
| /* n_past is not tracked by llama — caller must set it */ | |
| return read; | |
| } | |
| void nry_context_set_n_past(nry_context *c, int n_past) { | |
| if (c) c->n_past = n_past; | |
| } | |
| /* ========== Vision eval (context + mmproj) ========== */ | |
| int nry_vision_eval(nry_context *c, nry_mmproj *mm, const char *prompt, const char *image_path) { | |
| if (!c || !mm || !mm->mctx) return -1; | |
| mtmd_bitmap *bitmap = mtmd_helper_bitmap_init_from_file(mm->mctx, image_path); | |
| if (!bitmap) { | |
| fprintf(stderr, "[nry] failed to load image: %s\n", image_path); | |
| return -1; | |
| } | |
| mtmd_input_chunks *chunks = mtmd_input_chunks_init(); | |
| mtmd_input_text text = { prompt, true, true }; | |
| const mtmd_bitmap *bitmaps[] = { bitmap }; | |
| int32_t ret = mtmd_tokenize(mm->mctx, chunks, &text, bitmaps, 1); | |
| if (ret != 0) { | |
| mtmd_bitmap_free(bitmap); | |
| mtmd_input_chunks_free(chunks); | |
| return -2; | |
| } | |
| llama_pos new_n_past = 0; | |
| ret = mtmd_helper_eval_chunks(mm->mctx, c->ctx, chunks, c->n_past, 0, 512, true, &new_n_past); | |
| c->n_past = new_n_past; | |
| mtmd_bitmap_free(bitmap); | |
| mtmd_input_chunks_free(chunks); | |
| if (ret != 0) { | |
| fprintf(stderr, "[nry] vision eval failed: %d\n", ret); | |
| return -3; | |
| } | |
| fprintf(stderr, "[nry] vision eval done, n_past=%d\n", (int)c->n_past); | |
| return 0; | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment