Skip to content

Commit

Permalink
Epoch.
Browse files Browse the repository at this point in the history
  • Loading branch information
vifino committed Jan 5, 2019
0 parents commit 2045b6f
Show file tree
Hide file tree
Showing 5 changed files with 303 additions and 0 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
ff-notext
15 changes: 15 additions & 0 deletions COPYING
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
ISC License

Copyright (c) 2019, Adrian "vifino" Pistol

Permission to use, copy, modify, and/or distribute this software for any
purpose with or without fee is hereby granted, provided that the above
copyright notice and this permission notice appear in all copies.

THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 changes: 17 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
CC ?= cc
CFLAGS ?= -Os
CPPFLAGS += -pedantic -Wall -Wextra #-fsanitize=address,undefined

DESTDIR ?= /usr/local

BINS=ff-notext
all: $(BINS)

ff-notext: ff-notext.c
$(CC) $(CPPFLAGS) $(CFLAGS) $(shell pkg-config --cflags --libs tesseract) $(LDFLAGS) -o ff-notext $^

install: $(BINS)
install $(BINS) $(DESTDIR)/bin

clean:
rm -f $(BINS) *.o
121 changes: 121 additions & 0 deletions conversion.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
// Small library for color conversion.
// All with alpha channel.
// Uses doubles for conversion.
// Not fast at all, but simple.
#include <stdint.h>
#include <math.h>

// I know, I know, not standardized.
// But painless fallback.
#if defined(__linux__)
#include <endian.h>
#if __BYTE_ORDER != __BIG_ENDIAN
#define DOCONVERT
#endif
#else
#define DOCONVERT
#include <arpa/inet.h>
#define be32toh ntohl
#define htobe32 htonl
#define be16toh ntohs
#define htobe16 htons
#endif

#ifndef FP
#define FP double
#endif

#if FP == float
#define POW powf
#else
#define POW pow
#endif

// BE uint16_t <-> NE uint16_t
static inline void qbeush2ush(uint16_t* src, uint16_t* dst) {
dst[0] = be16toh(src[0]);
dst[1] = be16toh(src[1]);
dst[2] = be16toh(src[2]);
dst[3] = be16toh(src[3]);
}
static inline void qush2beush(uint16_t* src, uint16_t* dst) {
dst[0] = htobe16(src[0]);
dst[1] = htobe16(src[1]);
dst[2] = htobe16(src[2]);
dst[3] = htobe16(src[3]);
}

// uint16 <-> FP 0..1
#define USH2FP(chan) (((FP) (chan)) / UINT16_MAX)
#define FP2USH(chan) ((uint16_t) (chan * UINT16_MAX))

static inline void qush2fp(uint16_t* src, FP* dst) {
dst[0] = USH2FP(src[0]);
dst[1] = USH2FP(src[1]);
dst[2] = USH2FP(src[2]);
dst[3] = USH2FP(src[3]);
}
static inline void qfp2ush(FP* src, uint16_t* dst) {
dst[0] = FP2USH(src[0]);
dst[1] = FP2USH(src[1]);
dst[2] = FP2USH(src[2]);
dst[3] = FP2USH(src[3]);
}

// SRGB <-> Linear RGB
#define SRGB_THRES ((FP) 0.0031306684425217108)
#define SRGB2RGB(s) ((s <= (SRGB_THRES * 12.92)) ? s / 12.92 : POW((s + 0.055) / 1.055, 2.4))
#define RGB2SRGB(l) ((l <= SRGB_THRES) ? l * 12.92 : 1.055 * POW(l, 1.0/2.4) - 0.055)

static inline void srgb2rgb(FP* src, FP* dst) {
dst[0] = SRGB2RGB(src[0]);
dst[1] = SRGB2RGB(src[1]);
dst[2] = SRGB2RGB(src[2]);
dst[3] = src[3]; // yep. very complicated.
}
static inline void rgb2srgb(FP* src, FP* dst) {
dst[0] = RGB2SRGB(src[0]);
dst[1] = RGB2SRGB(src[1]);
dst[2] = RGB2SRGB(src[2]);
dst[3] = src[3]; // yep. very complicated.
}

// Linear RGB <-> CIE XYZ
#define MAT3MUL(s, v1, v2, v3) (s[0] * (FP)(v1) + s[1] * (FP)(v2) + s[2] * (FP)(v3))

#define MAT3M3MUL(s, d, scale, r1m1, r1m2, r1m3, r2m1, r2m2, r2m3, r3m1, r3m2, r3m3) \
(d)[0] = MAT3MUL((s), r1m1, r1m2, r1m3) * (FP)(scale); \
(d)[1] = MAT3MUL((s), r2m1, r2m2, r2m3) * (FP)(scale); \
(d)[2] = MAT3MUL((s), r3m1, r3m2, r3m3) * (FP)(scale);

static inline void rgb2xyz(FP* src, FP* dst) {
// table stolen from blind.
// seems to be a bit too bright?
MAT3M3MUL(src, dst, 1,
0.412457445582367576708548995157,
0.357575865245515878143578447634,
0.180437247826399665973085006954,
0.212673370378408277403536885686,
0.715151730491031756287156895269,
0.072174899130559869164791564344,
0.019333942761673460208893260415,
0.119191955081838593666354597644,
0.950302838552371742508739771438);

dst[3] = src[3]; // yep. very complicated.
}

static inline void xyz2rgb(FP* src, FP* dst) {
// table stolen from blind.
MAT3M3MUL(src, dst, 1,
3.240446254647737500675930277794,
-1.537134761820080575134284117667,
-0.498530193022728718155178739835,
-0.969266606244679751469561779231,
1.876011959788370209167851498933,
0.041556042214430065351304932619,
0.055643503564352832235773149705,
-0.204026179735960239147729566866,
1.057226567722703292062647051353);
dst[3] = src[3];
}
149 changes: 149 additions & 0 deletions ff-notext.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,149 @@
// ┳━┓┳━┓ ┏┓┓┏━┓┏┓┓┳━┓┓ ┃┏┓┓
// ┣━ ┣━ ━━┃┃┃┃ ┃ ┃ ┣━ ┏╋┛ ┃
// ┇ ┇ ┇┗┛┛━┛ ┇ ┻━┛┇ ┗ ┇
// ff-notext: censor text using tesseract
// Usage: <farbfeld source> | ff-notext | <farbfeld sink>
// made by vifino. ISC (C) vifino 2019

#include <stdio.h>
#include <string.h>
#include <stdint.h>
#include <stdlib.h>
#include <arpa/inet.h>

#include <tesseract/capi.h>

#include "conversion.h"

#define eprintf(...) fprintf(stderr, __VA_ARGS__)

// IO helpers.
static inline void chew(FILE* file, void* buffer, size_t bytes) {
if (!fread(buffer, bytes, 1, file)) {
eprintf("wanted more bytes, didn't get any?\n");
exit(1);
}
}
static inline void spew(FILE* file, void* buffer, size_t bytes) {
if (file)
if (!fwrite(buffer, bytes, 1, file)) {
eprintf("write failed.\n");
exit(1);
}
}

static void ffparse(FILE* food, FILE* out, uint32_t* w, uint32_t* h) {
char buf[8];
chew(food, buf, 8);
if (strncmp(buf, "farbfeld", 8) != 0) {
eprintf("file is not a farbfeld image?\n");
exit(1);
}
spew(out, buf, 8);

chew(food, buf, 8);
*w = ntohl(*(uint32_t*)buf);
*h = ntohl(*(uint32_t*)(buf + 4));
if (!w || !h) {
eprintf("image has zero dimension?\n");
exit(1);
}
spew(out, buf, 8);
}

int main() {
// initialize tesseract.
TessBaseAPI* tess = TessBaseAPICreate();
if (TessBaseAPIInit3(tess, NULL, "eng") != 0) {
eprintf("[ff-notext] Error initializing Tesseract.");
return 2;
}
//TessBaseAPISetVariable(tess, "tessedit_pageseg_mode", "11");
//TessBaseAPISetPageSegMode(tess, PSM_AUTO_ONLY);
TessBaseAPISetVariable(tess, "load_system_dawg", "false");
TessBaseAPISetVariable(tess, "load_freq_dawg", "false");
eprintf("[ff-notext] Initialized Tesseract %s\n", TessVersion());

// parse input image
uint32_t w, h;
ffparse(stdin, stdout, &w, &h);

uint16_t* frame = malloc(w * h * sizeof(uint16_t) * 4);
if (!frame) return 2;

uint8_t* rgba32 = malloc(w * h * 4);
if (!rgba32) return 2;

size_t x, y;
uint16_t buf[4] = {0};
for (y = 0; y < h; y++) {
for (x = 0; x < w; x++) {
// nom.
chew(stdin, buf, 8);
size_t off = (x + (y * w)) * 4;
frame[off + 0] = buf[0];
frame[off + 1] = buf[1];
frame[off + 2] = buf[2];
frame[off + 3] = buf[3];

if (buf[3]) { // only if alpha is set
#ifdef DOCONVERT
qbeush2ush(buf, buf);
#endif
rgba32[off + 0] = buf[0] / 256;
rgba32[off + 1] = buf[1] / 256;
rgba32[off + 2] = buf[2] / 256;
rgba32[off + 3] = buf[3] / 256;
} else {
rgba32[off + 0] = 0;
rgba32[off + 1] = 0;
rgba32[off + 2] = 0;
rgba32[off + 3] = 0;
}
}
}

// Look for text, kill it.
eprintf("[ff-notext] Setting image..\n");
TessBaseAPISetImage(tess, rgba32, w, h, 4, 4 * w);
//TessBaseAPISetImage2(tess, image);
if (TessBaseAPIGetSourceYResolution(tess) < 70) {
TessBaseAPISetSourceResolution(tess, 70);
}

eprintf("[ff-notext] Recognizing...\n");
if (TessBaseAPIRecognize(tess, NULL) != 0) {
eprintf("[ff-notext] error in tesseract recognition\n");
return 3;
}

eprintf("[ff-notext] Iterating over results...\n");
TessResultIterator* ri = TessBaseAPIGetIterator(tess);
TessPageIteratorLevel level = RIL_WORD;
if (ri != 0) {
do {
int sx, sy;
int ex, ey;
TessPageIteratorBoundingBox(ri, level, &sx, &sy, &ex, &ey);
if (sx != 0 && sy != 0 && ex != w && ey != h)
for (y = sy; y <= ey; y++)
for (x = sx; x <= ex; x++) {
int off = (x + (y*w)) * 4;
frame[off + 0] = 0;
frame[off + 1] = 0;
frame[off + 2] = 0;
frame[off + 3] = 0;
}

char* word = TessResultIteratorGetUTF8Text(ri, level);
eprintf("[ff-notext] [(%u,%u),(%u,%u)}]: %s\n", sx, sy, ex, ey, word);
free(word);
} while (TessPageIteratorNext(ri, level));
}
free(rgba32);

// Write out.
spew(stdout, frame, w*h*sizeof(int16_t)*4);
free(frame);
return 0;
}

0 comments on commit 2045b6f

Please sign in to comment.