问题
I am trying to develop an algorithm that converts simple mono line images ie Maze, to a text 2d array.
For example, the image below, it would be converted to the following text array.
[|------------ |]
[| | |]
[| |]
[| |------| ---- |]
[| | | |]
[| | --- |]
[|--- | | |]
[| |--- | |]
[| | | |]
[| --------------- |]
[| |]
[| -------------------|]
and finally, like this, where 0=obstacle and 1=free passage
[0000000000000111111110]
[0111110111111111111110]
[0111111111111111111110]
[0110000000011111100000]
[0111111111011111011110]
[0111111111011111000110]
[0000111111011111111010]
[0111111111000011111010]
[0111110111111011111110]
[0111100000000000000010]
[0111111111111111111110]
[0110000000000000000000]
I am thinking to use an Image to Line Art Text like algorithms, ie https://www.text-image.com/convert/pic2ascii.cgi
What do you think about this approach?
回答1:
Interseting problem its basically vector form of Image to ASCII art conversion... I managed to do this with this algorithm:
preprocess image
You gave us JPG which has lossy compresion meaning your image contain much more than just 2 colors. So there are shades and artifacts which will screw things up. So first we must get rid of those by thresholding and recoloring. So we can have 2D BW image (no grayscales)
vectorize
Your maze is axis aligned so it contains only horizontal and vertical (
h
,v
) lines. So simply scan each line of image find first starting wall pixel then its ending pixel and store somewhere... repeat until whole line is processed and do this for all lines. Again do the same for rows of image. As your image has thick walls ignore lines sorter than thickness threshold and remove adjacent (duplicates) line that are (almost) the same.get list of possible grid coordinates from h,v lines
simply make a list of all x and y (separately) coordinates from lines start and end points. Then sort them and remove too close coordinates (duplicates).
Now the min and max values gives you AABB of your maze and GCD of all the
coordinate-lowest coordinate
will give you grid size.align h,v lines to grid
simply round all start/end points to nearest grid position ...
create text buffer for maze
AABB along with grid size will give you resolution of your maz in cells so simply create 2D text buffer where each cell has
NxN
characters. I am using6x3
cells which looks nice enough (square and with enough space inside).renmder h,v lines into text
simply loop through all lines and render
-
or|
instead of pixels... I am using also+
if the target position does not contain ' '.convert 2D text array into wanted text output
simply copy the lines into single text ... or if you clever enough you can have 1D and 2D at the same memory place with
eol
encoded between lines.
Here simple example in C++/VCL I made from the exampe in the link above:
//---------------------------------------------------------------------------
#include <vcl.h>
#include <jpeg.hpp>
#pragma hdrstop
#include "win_main.h"
#include "List.h"
//---------------------------------------------------------------------------
#pragma package(smart_init)
#pragma resource "*.dfm"
TForm1 *Form1;
Graphics::TBitmap *bmp=new Graphics::TBitmap;
int txt_xs=0,txt_ys=0,txt_xf=0;
//---------------------------------------------------------------------------
template <class T> void sort_asc_bubble(T *a,int n)
{
int i,e; T a0,a1;
for (e=1;e;n--) // loop until no swap occurs
for (e=0,a0=a[0],a1=a[1],i=1;i<n;a0=a1,i++,a1=a[i])// proces unsorted part of array
if (a0>a1) // condition if swap needed
{ a[i-1]=a1; a[i]=a0; a1=a0; e=1; } // swap and allow to process array again
}
//---------------------------------------------------------------------------
AnsiString bmp2lintxt(Graphics::TBitmap *bmp)
{
bool debug=false;
const int cx=6; // cell size
const int cy=3;
const int thr_bw=400; // BW threshold
const int thr_thickness=10; // wall thikness threshold
char a;
AnsiString txt="",eol="\r\n";
int x,y,x0,y0,x1,y1,xs,ys,gx,gy,nx,ny,i,i0,i1,j;
union { BYTE db[4]; DWORD dd; } c; DWORD **pyx;
List<int> h,v; // horizontal and vertical lines (x,y,size)
List<int> tx,ty;// temp lists for grid GCD computation
// [init stuff]
bmp->HandleType=bmDIB;
bmp->PixelFormat=pf32bit;
xs=bmp->Width ;
ys=bmp->Height;
if (xs<=0) return txt;
if (ys<=0) return txt;
pyx=new DWORD*[ys];
for (y=0;y<ys;y++) pyx[y]=(DWORD*)bmp->ScanLine[y];
i=xs; if (i<ys) i=ys;
// threshold bmp to B&W
x0=xs; x1=0; y0=xs; y1=0;
for (y=0;y<ys;y++)
for (x=0;x<xs;x++)
{
c.dd=pyx[y][x];
i =c.db[0];
i+=c.db[1];
i+=c.db[2];
if (i>=thr_bw) c.dd=0x00FFFFFF;
else c.dd=0x00000000;
pyx[y][x]=c.dd;
}
if (debug) bmp->SaveToFile("out0_bw.bmp");
// [vectorize]
// get horizontal lines
i0=0; i1=0; h.num=0;
for (y0=0;y0<ys;y0++)
{
for (x0=0;x0<xs;)
{
for ( ;x0<xs;x0++) if (!pyx[y0][x0]) break;
for (x1=x0;x1<xs;x1++) if ( pyx[y0][x1]){ x1--; break; }
i=x1-x0;
if (i>thr_thickness)
{
h.add(x0);
h.add(y0);
h.add(i);
}
x0=x1+1;
}
// remove duplicate lines
for (i=i0;i<i1;i+=3)
for (j=i1;j<h.num;j+=3)
if ((abs(h[i+0]-h[j+0])<thr_thickness)&&(abs(h[i+2]-h[j+2])<thr_thickness))
{
h.del(i);
h.del(i);
h.del(i);
i1-=3; i-=3; break;
}
i0=i1; i1=h.num;
}
// get vertical lines
i0=0; i1=0; v.num=0;
for (x0=0;x0<xs;x0++)
{
for (y0=0;y0<ys;)
{
for ( ;y0<ys;y0++) if (!pyx[y0][x0]) break;
for (y1=y0;y1<ys;y1++) if ( pyx[y1][x0]){ y1--; break; }
i=y1-y0;
if (i>thr_thickness)
{
v.add(x0);
v.add(y0);
v.add(i);
}
y0=y1+1;
}
// remove duplicate lines
for (i=i0;i<i1;i+=3)
for (j=i1;j<v.num;j+=3)
if ((abs(v[i+1]-v[j+1])<thr_thickness)&&(abs(v[i+2]-v[j+2])<thr_thickness))
{
v.del(i);
v.del(i);
v.del(i);
i1-=3; i-=3; break;
}
i0=i1; i1=v.num;
}
// [compute grid]
x0=xs; y0=ys; x1=0; y1=0; // AABB
gx=10; gy=10; // grid cell size
nx=0; ny=0; // grid cells
tx.num=0; ty.num=0; // clear possible x,y coordinates
for (i=0;i<h.num;i+=3)
{
x =h[i+0];
y =h[i+1];
if (x0>x) x0=x; if (x1<x) x1=x; for (j=0;j<tx.num;j++) if (tx[j]==x){ j=-1; break; } if (j>=0) tx.add(x);
if (y0>y) y0=y; if (y1<y) y1=y; for (j=0;j<ty.num;j++) if (ty[j]==y){ j=-1; break; } if (j>=0) ty.add(y);
x+=h[i+2];
if (x0>x) x0=x; if (x1<x) x1=x; for (j=0;j<tx.num;j++) if (tx[j]==x){ j=-1; break; } if (j>=0) tx.add(x);
}
for (i=0;i<v.num;i+=3)
{
x =v[i+0];
y =v[i+1];
if (x0>x) x0=x; if (x1<x) x1=x; for (j=0;j<tx.num;j++) if (tx[j]==x){ j=-1; break; } if (j>=0) tx.add(x);
if (y0>y) y0=y; if (y1<y) y1=y; for (j=0;j<ty.num;j++) if (ty[j]==y){ j=-1; break; } if (j>=0) ty.add(y);
y+=v[i+2];
if (y0>y) y0=y; if (y1<y) y1=y; for (j=0;j<ty.num;j++) if (ty[j]==y){ j=-1; break; } if (j>=0) ty.add(y);
}
// order tx,ty
sort_asc_bubble(tx.dat,tx.num);
sort_asc_bubble(ty.dat,ty.num);
// remove too close coordinates
for (i=1;i<tx.num;i++) if (tx[i]-tx[i-1]<=thr_thickness){ tx.del(i); i--; }
for (i=1;i<ty.num;i++) if (ty[i]-ty[i-1]<=thr_thickness){ ty.del(i); i--; }
// estimate gx,gy
for (gx=x1-x0,i=1;i<tx.num;i++){ x=tx[i]-tx[i-1]; if (gx>x) gx=x; } nx=(x1-x0+1)/gx; gx=(x1-x0+1)/nx; x1=x0+nx*gx;
for (gy=y1-y0,i=1;i<ty.num;i++){ y=ty[i]-ty[i-1]; if (gy>y) gy=y; } ny=(y1-y0+1)/gy; gy=(y1-y0+1)/ny; y1=y0+ny*gy;
// align x,y to grid: multiplicate nx,ny by cx,cy to form boxes and enlarge by 1 for final border lines
nx=(cx*nx)+1;
ny=(cy*ny)+1;
// align h,v lines to grid
for (i=0;i<h.num;i+=3)
{
x=h[i+0]-x0; x=((x+(gx>>1))/gx)*gx; h[i+0]=x+x0;
y=h[i+1]-y0; y=((y+(gy>>1))/gy)*gy; h[i+1]=y+y0;
j=h[i+2]; j=((j+(gx>>1))/gx)*gx; h[i+2]=j;
}
for (i=0;i<v.num;i+=3)
{
x=v[i+0]-x0; x=((x+(gx>>1))/gx)*gx; v[i+0]=x+x0;
y=v[i+1]-y0; y=((y+(gy>>1))/gy)*gy; v[i+1]=y+y0;
j=v[i+2]; j=((j+(gy>>1))/gy)*gy; v[i+2]=j;
}
// [h,v lines -> ASCII Art]
char *text=new char[nx*ny];
char **tyx=new char*[ny];
for (y=0;y<ny;y++)
for (tyx[y]=text+(nx*y),x=0;x<nx;x++)
tyx[y][x]=' ';
// h lines
for (i=0;i<h.num;i+=3)
{
x=(h[i+0]-x0)/gx;
y=(h[i+1]-y0)/gy;
j=(h[i+2] )/gx; j+=x;
x*=cx; y*=cy; j*=cx;
for (;x<=j;x++) tyx[y][x]='-';
}
// v lines
for (i=0;i<v.num;i+=3)
{
x=(v[i+0]-x0)/gx;
y=(v[i+1]-y0)/gy;
j=(v[i+2] )/gy; j+=y;
x*=cx; y*=cy; j*=cy;
for (;y<=j;y++)
if (tyx[y][x]=='-') tyx[y][x]='+';
else tyx[y][x]='|';
}
// convert char[ny][nx] to AnsiString
for (txt="",y=0;y<ny;y++,txt+=eol)
for (x=0;x<nx;x++) txt+=tyx[y][x];
txt_xs=nx; // just remember the text size for window resize
txt_ys=ny;
delete[] text;
delete[] tyx;
// [debug draw]
// grid
bmp->Canvas->Pen->Color=TColor(0x000000FF);
for (i=1,x=x0;i;x+=gx)
{
if (x>=x1){ x=x1; i=0; }
bmp->Canvas->MoveTo(x,y0);
bmp->Canvas->LineTo(x,y1);
}
for (i=1,y=y0;i;y+=gy)
{
if (y>=y1){ y=y1; i=0; }
bmp->Canvas->MoveTo(x0,y);
bmp->Canvas->LineTo(x1,y);
}
if (debug) bmp->SaveToFile("out1_grid.bmp");
// h,v lines
bmp->Canvas->Pen->Color=TColor(0x00FF0000);
bmp->Canvas->Pen->Width=2;
for (i=0;i<h.num;)
{
x=h[i]; i++;
y=h[i]; i++;
j=h[i]; i++;
bmp->Canvas->MoveTo(x,y);
bmp->Canvas->LineTo(x+j,y);
}
for (i=0;i<v.num;)
{
x=v[i]; i++;
y=v[i]; i++;
j=v[i]; i++;
bmp->Canvas->MoveTo(x,y);
bmp->Canvas->LineTo(x,y+j);
}
bmp->Canvas->Pen->Width=1;
if (debug) bmp->SaveToFile("out2_maze.bmp");
delete[] pyx;
return txt;
}
//---------------------------------------------------------------------------
void update()
{
int x0,x1,y0,y1,i,l;
x0=bmp->Width;
y0=bmp->Height;
// Font size
Form1->mm_txt->Font->Size=Form1->cb_font->ItemIndex+4;
txt_xf=abs(Form1->mm_txt->Font->Size);
// mode
Form1->mm_txt->Text=bmp2lintxt(bmp);
// output
Form1->mm_txt->Lines->SaveToFile("pic.txt");
x1=txt_xs*txt_xf;
y1=txt_ys*abs(Form1->mm_txt->Font->Height);
if (y0<y1) y0=y1;
x0+=x1+16+Form1->flb_pic->Width;
y0+=Form1->pan_top->Height;
if (x0<340) x0=340;
if (y0<128) y0=128;
Form1->ClientWidth=x0;
Form1->ClientHeight=y0;
Form1->Caption=AnsiString().sprintf("Picture -> Text ( Font %ix%i )",abs(Form1->mm_txt->Font->Size),abs(Form1->mm_txt->Font->Height));
}
//---------------------------------------------------------------------------
void draw()
{
Form1->ptb_gfx->Canvas->Draw(0,0,bmp);
}
//---------------------------------------------------------------------------
void load(AnsiString name)
{
if (name=="") return;
AnsiString ext=ExtractFileExt(name).LowerCase();
if (ext==".bmp")
{
bmp->LoadFromFile(name);
}
if (ext==".jpg")
{
TJPEGImage *jpg=new TJPEGImage;
jpg->LoadFromFile(name);
bmp->Assign(jpg);
delete jpg;
}
bmp->HandleType=bmDIB;
bmp->PixelFormat=pf32bit;
Form1->ptb_gfx->Width=bmp->Width;
Form1->ClientHeight=bmp->Height;
Form1->ClientWidth=(bmp->Width<<1)+32;
}
//---------------------------------------------------------------------------
__fastcall TForm1::TForm1(TComponent* Owner):TForm(Owner)
{
}
//---------------------------------------------------------------------------
void __fastcall TForm1::FormDestroy(TObject *Sender)
{
delete bmp;
}
//---------------------------------------------------------------------------
void __fastcall TForm1::FormPaint(TObject *Sender)
{
draw();
}
//---------------------------------------------------------------------------
void __fastcall TForm1::flb_picChange(TObject *Sender)
{
load(flb_pic->FileName);
update();
}
//---------------------------------------------------------------------------
void __fastcall TForm1::FormActivate(TObject *Sender)
{
flb_pic->SetFocus();
flb_pic->Update();
if (flb_pic->ItemIndex==-1)
if (flb_pic->Items->Count>0)
{
flb_pic->ItemIndex=0;
flb_picChange(this);
}
}
//---------------------------------------------------------------------------
Just ignore the VCL stuff and convert the resulting text into whatever you have at disposal. I also use mine dynamic list template so:
List<double> xxx;
is the same as double xxx[];
xxx.add(5);
adds 5
to end of the list
xxx[7]
access array element (safe)
xxx.dat[7]
access array element (unsafe but fast direct access)
xxx.num
is the actual used size of the array
xxx.reset()
clears the array and set xxx.num=0
xxx.allocate(100)
preallocate space for 100
items
So use whatever list you got or recode or use std::vector
instead...
I edited out the texts from your image:
And this is the result using that as input:
+-----------+------------------ |
| | |
| | |
| | | | +-----------+
| | | | |
| | | | |
| +-----------+ +-----+ |
| | | |
| | | |
+------ | +-----+ | |
| | | |
| | | |
| ------+-----------+------ |
| |
| |
| ------------------------------+
And here the saved debug bitmaps (from left to right: BW,Grid,Maze):
The only important stuff from the code is function:
AnsiString bmp2lintxt(Graphics::TBitmap *bmp);
Which returns text from VCL (GDI based) bitmap.
来源:https://stackoverflow.com/questions/63179536/convert-a-simple-mono-drawing-image-to-a-2d-text-array