Entropic 2.3.8
Local-first agentic inference engine
Loading...
Searching...
No Matches
image_preprocessor.cpp
Go to the documentation of this file.
1// SPDX-License-Identifier: Apache-2.0
15
16#include <algorithm>
17#include <fstream>
18#include <stdexcept>
19
20// stb_image — use llama.cpp's vendored copy.
21// STB_IMAGE_STATIC keeps symbols internal to this TU to avoid
22// duplicate symbol conflicts with llama.cpp's own stb_image use.
23#define STB_IMAGE_IMPLEMENTATION
24#define STB_IMAGE_STATIC
25#define STBI_ONLY_PNG
26#define STBI_ONLY_JPEG
27#define STBI_ONLY_BMP
28#define STBI_ONLY_GIF
29#include <stb/stb_image.h>
30
31static auto logger = entropic::log::get("inference.image_preprocessor");
32
33namespace {
34
50float lerp_channel(const uint8_t* data, int w,
51 int x0, int y0, int x1, int y1,
52 float fx, float fy, int ch) {
53 auto px = [data, w, ch](int x, int y) -> float {
54 return static_cast<float>(
55 data[(static_cast<size_t>(y) * static_cast<size_t>(w)
56 + static_cast<size_t>(x)) * 3
57 + static_cast<size_t>(ch)]);
58 };
59 float top = px(x0, y0) * (1.0f - fx) + px(x1, y0) * fx;
60 float bot = px(x0, y1) * (1.0f - fx) + px(x1, y1) * fx;
61 return top * (1.0f - fy) + bot * fy;
62}
63
75void interpolate_pixel(const uint8_t* src, int src_w, int src_h,
76 uint8_t* dst,
77 float src_x, float src_y) {
78 int x0 = std::max(0, static_cast<int>(src_x));
79 int x1 = std::min(src_w - 1, x0 + 1);
80 int y0 = std::max(0, static_cast<int>(src_y));
81 int y1 = std::min(src_h - 1, y0 + 1);
82 float fx = src_x - static_cast<float>(x0);
83 float fy = src_y - static_cast<float>(y0);
84
85 for (int ch = 0; ch < 3; ++ch) {
86 float val = lerp_channel(src, src_w, x0, y0, x1, y1, fx, fy, ch);
87 dst[ch] = static_cast<uint8_t>(std::clamp(val, 0.0f, 255.0f));
88 }
89}
90
99void bilinear_resize(entropic::PreprocessedImage& img,
100 int new_w, int new_h) {
101 std::vector<uint8_t> out(
102 static_cast<size_t>(new_w) * static_cast<size_t>(new_h) * 3);
103
104 float x_ratio = static_cast<float>(img.width) / static_cast<float>(new_w);
105 float y_ratio = static_cast<float>(img.height) / static_cast<float>(new_h);
106
107 for (int y = 0; y < new_h; ++y) {
108 float sy = (static_cast<float>(y) + 0.5f) * y_ratio - 0.5f;
109 for (int x = 0; x < new_w; ++x) {
110 float sx = (static_cast<float>(x) + 0.5f) * x_ratio - 0.5f;
111 size_t dst_off = (static_cast<size_t>(y)
112 * static_cast<size_t>(new_w)
113 + static_cast<size_t>(x)) * 3;
114 interpolate_pixel(img.pixel_data.data(),
115 img.width, img.height,
116 out.data() + dst_off, sx, sy);
117 }
118 }
119
120 img.pixel_data = std::move(out);
121 img.width = new_w;
122 img.height = new_h;
123}
124
125} // anonymous namespace
126
127namespace entropic {
128
135 : config_(config) {}
136
146static std::vector<uint8_t> read_image_bytes(
147 const std::filesystem::path& path, size_t max_file_size) {
148 if (!std::filesystem::exists(path)) {
149 logger->error("Image file not found: {}", path.string());
150 throw std::runtime_error("Image file not found: " + path.string());
151 }
152 auto file_size = std::filesystem::file_size(path);
153 if (file_size > max_file_size) {
154 logger->error("Image file too large: {} bytes (max {})",
155 file_size, max_file_size);
156 throw std::runtime_error(
157 "Image exceeds max file size ("
158 + std::to_string(file_size) + " > "
159 + std::to_string(max_file_size) + ")");
160 }
161 std::ifstream file(path, std::ios::binary);
162 if (!file) {
163 throw std::runtime_error("Cannot open image file: " + path.string());
164 }
165 std::vector<uint8_t> buf(static_cast<size_t>(file_size));
166 file.read(reinterpret_cast<char*>(buf.data()),
167 static_cast<std::streamsize>(file_size));
168 return buf;
169}
170
180 const std::filesystem::path& path) {
181 auto buf = read_image_bytes(path, config_.max_file_size);
182 auto img = decode(buf.data(), buf.size(), path.string());
183 img.source_path = path.string();
184 resize_if_needed(img);
185
186 logger->info("Preprocessed image {}: {}x{} ({} bytes)",
187 path.string(), img.width, img.height,
188 img.pixel_data.size());
189 return img;
190}
191
203 const uint8_t* data,
204 size_t len,
205 const std::string& source_label) {
206 auto img = decode(data, len, source_label);
207 img.source_path = source_label;
208 resize_if_needed(img);
209
210 logger->info("Preprocessed buffer '{}': {}x{}",
211 source_label, img.width, img.height);
212 return img;
213}
214
224PreprocessedImage ImagePreprocessor::decode(
225 const uint8_t* data, size_t len,
226 const std::string& source) {
227 int w = 0;
228 int h = 0;
229 int ch = 0;
230 auto* pixels = stbi_load_from_memory(
231 data, static_cast<int>(len), &w, &h, &ch, 3);
232
233 if (pixels == nullptr) {
234 logger->error("Failed to decode image '{}': {}",
235 source, stbi_failure_reason());
236 throw std::runtime_error(
237 "Unsupported image format or decode error: " + source);
238 }
239
240 PreprocessedImage img;
241 img.width = w;
242 img.height = h;
243 img.channels = 3;
244 size_t data_size = static_cast<size_t>(w) * static_cast<size_t>(h) * 3;
245 img.pixel_data.assign(pixels, pixels + data_size);
246 stbi_image_free(pixels);
247
248 return img;
249}
250
262void ImagePreprocessor::resize_if_needed(PreprocessedImage& img) {
263 if (img.width <= config_.max_width
264 && img.height <= config_.max_height) {
265 return;
266 }
267
268 float scale_w = static_cast<float>(config_.max_width)
269 / static_cast<float>(img.width);
270 float scale_h = static_cast<float>(config_.max_height)
271 / static_cast<float>(img.height);
272
273 float scale = config_.preserve_aspect
274 ? std::min(scale_w, scale_h)
275 : 1.0f; // unused, but satisfies compiler
276
277 if (!config_.preserve_aspect) {
278 scale_w = std::min(scale_w, 1.0f);
279 scale_h = std::min(scale_h, 1.0f);
280 } else {
281 scale_w = scale;
282 scale_h = scale;
283 }
284
285 int new_w = std::max(1, static_cast<int>(
286 static_cast<float>(img.width) * scale_w));
287 int new_h = std::max(1, static_cast<int>(
288 static_cast<float>(img.height) * scale_h));
289
290 bilinear_resize(img, new_w, new_h);
291}
292
293} // namespace entropic
PreprocessedImage preprocess_file(const std::filesystem::path &path)
Preprocess an image from file path.
ImagePreprocessor(const ImagePreprocessConfig &config)
Construct preprocessor with config.
PreprocessedImage preprocess_buffer(const uint8_t *data, size_t len, const std::string &source_label)
Preprocess an image from memory buffer.
Image preprocessing for multimodal inference.
spdlog initialization and logger access.
ENTROPIC_EXPORT std::shared_ptr< spdlog::logger > get(const std::string &name)
Get or create a named logger.
Definition logging.cpp:211
Activate model on GPU (WARM → ACTIVE).
static std::vector< uint8_t > read_image_bytes(const std::filesystem::path &path, size_t max_file_size)
Validate + read an image file fully into a byte buffer.
Image preprocessing configuration.
bool preserve_aspect
Preserve aspect ratio when resizing.
int max_width
Maximum image width (resized if exceeded)
int max_height
Maximum image height (resized if exceeded)
size_t max_file_size
Maximum file size in bytes (20MB)
Preprocessed image ready for vision encoder.
std::string source_path
Original source path (for logging)
int height
Image height in pixels.
std::vector< uint8_t > pixel_data
RGB pixel data (row-major)
int width
Image width in pixels.