23#define STB_IMAGE_IMPLEMENTATION
24#define STB_IMAGE_STATIC
29#include <stb/stb_image.h>
50float lerp_channel(
const uint8_t* data,
int w,
51 int x0,
int y0,
int x1,
int y1,
52 float fx,
float fy,
int ch) {
53 auto px = [data, w, ch](
int x,
int y) ->
float {
54 return static_cast<float>(
55 data[(
static_cast<size_t>(y) *
static_cast<size_t>(w)
56 +
static_cast<size_t>(x)) * 3
57 +
static_cast<size_t>(ch)]);
59 float top = px(x0, y0) * (1.0f - fx) + px(x1, y0) * fx;
60 float bot = px(x0, y1) * (1.0f - fx) + px(x1, y1) * fx;
61 return top * (1.0f - fy) + bot * fy;
75void interpolate_pixel(
const uint8_t* src,
int src_w,
int src_h,
77 float src_x,
float src_y) {
78 int x0 = std::max(0,
static_cast<int>(src_x));
79 int x1 = std::min(src_w - 1, x0 + 1);
80 int y0 = std::max(0,
static_cast<int>(src_y));
81 int y1 = std::min(src_h - 1, y0 + 1);
82 float fx = src_x -
static_cast<float>(x0);
83 float fy = src_y -
static_cast<float>(y0);
85 for (
int ch = 0; ch < 3; ++ch) {
86 float val = lerp_channel(src, src_w, x0, y0, x1, y1, fx, fy, ch);
87 dst[ch] =
static_cast<uint8_t
>(std::clamp(val, 0.0f, 255.0f));
100 int new_w,
int new_h) {
101 std::vector<uint8_t> out(
102 static_cast<size_t>(new_w) *
static_cast<size_t>(new_h) * 3);
104 float x_ratio =
static_cast<float>(img.
width) /
static_cast<float>(new_w);
105 float y_ratio =
static_cast<float>(img.
height) /
static_cast<float>(new_h);
107 for (
int y = 0; y < new_h; ++y) {
108 float sy = (
static_cast<float>(y) + 0.5f) * y_ratio - 0.5f;
109 for (
int x = 0; x < new_w; ++x) {
110 float sx = (
static_cast<float>(x) + 0.5f) * x_ratio - 0.5f;
111 size_t dst_off = (
static_cast<size_t>(y)
112 *
static_cast<size_t>(new_w)
113 +
static_cast<size_t>(x)) * 3;
116 out.data() + dst_off, sx, sy);
147 const std::filesystem::path& path,
size_t max_file_size) {
148 if (!std::filesystem::exists(path)) {
149 logger->error(
"Image file not found: {}", path.string());
150 throw std::runtime_error(
"Image file not found: " + path.string());
152 auto file_size = std::filesystem::file_size(path);
153 if (file_size > max_file_size) {
154 logger->error(
"Image file too large: {} bytes (max {})",
155 file_size, max_file_size);
156 throw std::runtime_error(
157 "Image exceeds max file size ("
158 + std::to_string(file_size) +
" > "
159 + std::to_string(max_file_size) +
")");
161 std::ifstream file(path, std::ios::binary);
163 throw std::runtime_error(
"Cannot open image file: " + path.string());
165 std::vector<uint8_t> buf(
static_cast<size_t>(file_size));
166 file.read(
reinterpret_cast<char*
>(buf.data()),
167 static_cast<std::streamsize
>(file_size));
180 const std::filesystem::path& path) {
182 auto img = decode(buf.data(), buf.size(), path.string());
184 resize_if_needed(img);
186 logger->info(
"Preprocessed image {}: {}x{} ({} bytes)",
205 const std::string& source_label) {
206 auto img = decode(data, len, source_label);
208 resize_if_needed(img);
210 logger->info(
"Preprocessed buffer '{}': {}x{}",
225 const uint8_t* data,
size_t len,
226 const std::string& source) {
230 auto* pixels = stbi_load_from_memory(
231 data,
static_cast<int>(len), &w, &h, &ch, 3);
233 if (pixels ==
nullptr) {
234 logger->error(
"Failed to decode image '{}': {}",
235 source, stbi_failure_reason());
236 throw std::runtime_error(
237 "Unsupported image format or decode error: " + source);
240 PreprocessedImage img;
244 size_t data_size =
static_cast<size_t>(w) *
static_cast<size_t>(h) * 3;
245 img.pixel_data.assign(pixels, pixels + data_size);
246 stbi_image_free(pixels);
262void ImagePreprocessor::resize_if_needed(PreprocessedImage& img) {
268 float scale_w =
static_cast<float>(config_.
max_width)
269 /
static_cast<float>(img.width);
270 float scale_h =
static_cast<float>(config_.
max_height)
271 /
static_cast<float>(img.height);
274 ? std::min(scale_w, scale_h)
278 scale_w = std::min(scale_w, 1.0f);
279 scale_h = std::min(scale_h, 1.0f);
285 int new_w = std::max(1,
static_cast<int>(
286 static_cast<float>(img.width) * scale_w));
287 int new_h = std::max(1,
static_cast<int>(
288 static_cast<float>(img.height) * scale_h));
290 bilinear_resize(img, new_w, new_h);
PreprocessedImage preprocess_file(const std::filesystem::path &path)
Preprocess an image from file path.
ImagePreprocessor(const ImagePreprocessConfig &config)
Construct preprocessor with config.
PreprocessedImage preprocess_buffer(const uint8_t *data, size_t len, const std::string &source_label)
Preprocess an image from memory buffer.
Image preprocessing for multimodal inference.
spdlog initialization and logger access.
ENTROPIC_EXPORT std::shared_ptr< spdlog::logger > get(const std::string &name)
Get or create a named logger.
Activate model on GPU (WARM → ACTIVE).
static std::vector< uint8_t > read_image_bytes(const std::filesystem::path &path, size_t max_file_size)
Validate + read an image file fully into a byte buffer.
Image preprocessing configuration.
bool preserve_aspect
Preserve aspect ratio when resizing.
int max_width
Maximum image width (resized if exceeded)
int max_height
Maximum image height (resized if exceeded)
size_t max_file_size
Maximum file size in bytes (20MB)
Preprocessed image ready for vision encoder.
std::string source_path
Original source path (for logging)
int height
Image height in pixels.
std::vector< uint8_t > pixel_data
RGB pixel data (row-major)
int width
Image width in pixels.