Get the Middle/Beginning/End arabic char in string

有些话、适合烂在心里 提交于 2021-02-16 05:29:51

问题


Most of the Arabic letters have multiple contextual forms for example the latter ب has general unicode 0628.

But if latter come in the beginning of word will take this form بـ‎ unicode FE91.

Middle = ـبـ‎ unicode FE92.

End of the word = ـب‎‎ unicode FE90.

I'm trying to get the char code but i always get the general unicode.

procedure TfMain.btn2Click(Sender: TObject);
const
  Str  = 'يبداء';
  Ch   = 'ب';
begin
  ShowMessage(IntToHex(Ord(Ch), 4));  // return 0628 - Correct
  ShowMessage(IntToHex(Ord(Str[2]), 4)); // return 0628 - Wrong
end;

Q: How i can get the correct code for the char depending on its position in the word ?

More information

https://en.wikipedia.org/wiki/Bet_(letter)#Arabic_b.C4.81.CA.BE https://en.wikipedia.org/wiki/Arabic_script_in_Unicode


回答1:


I come with this idea as full completed workaround for now.

The below function will return the correct Char form the U-Table depending on its position from the passed string.

This function take the Arabic language grammar into account.

type
  T3Ch = array[0..2] of Char;
  T5Ch = array[0..4] of Char;

{Arabic — Unicode Character Table}
const
  cAlif    : T3Ch = (#$0627, #$FE8D, #$FE8E); // General / Isolated / End
  cBa      : T5Ch = (#$0628, #$FE8F, #$FE90, #$FE92, #$FE91); // General / Isolated / End / Middle / Beginning
  cTa      : T5Ch = (#$062A, #$FE95, #$FE96, #$FE98, #$FE97);
  cTha     : T5Ch = (#$062B, #$FE99, #$FE9A, #$FE9C, #$FE9B);
  cGim     : T5Ch = (#$062C, #$FE9D, #$FE9E, #$FEA0, #$FE9F);
  c7a      : T5Ch = (#$062D, #$FEA1, #$FEA2, #$FEA4, #$FEA3);
  c5a      : T5Ch = (#$062E, #$FEA5, #$FEA6, #$FEA8, #$FEA7);
  cDal     : T3Ch = (#$062F, #$FEA9, #$FEAA);
  cThal    : T3Ch = (#$0630, #$FEAB, #$FEAC);
  cRa      : T3Ch = (#$0631, #$FEAD, #$FEAE);
  cZa      : T3Ch = (#$0632, #$FEAF, #$FEB0);
  cSin     : T5Ch = (#$0633, #$FEB1, #$FEB2, #$FEB4, #$FEB3);
  cShin    : T5Ch = (#$0634, #$FEB5, #$FEB6, #$FEB8, #$FEB7);
  cSad     : T5Ch = (#$0635, #$FEB9, #$FEBA, #$FEBC, #$FEBB);
  cDad     : T5Ch = (#$0636, #$FEBD, #$FEBE, #$FEC0, #$FEBF);
  c6a      : T5Ch = (#$0637, #$FEC1, #$FEC2, #$FEC4, #$FEC3);
  c6_a     : T5Ch = (#$0638, #$FEC5, #$FEC6, #$FEC8, #$FEC7);
  cAyn     : T5Ch = (#$0639, #$FEC9, #$FECA, #$FECC, #$FECB);
  cGayn    : T5Ch = (#$063A, #$FECD, #$FECE, #$FED0, #$FECF);
  cFa      : T5Ch = (#$0641, #$FED1, #$FED2, #$FED4, #$FED3);
  cQaf     : T5Ch = (#$0642, #$FED5, #$FED6, #$FED8, #$FED7);
  cKaf     : T5Ch = (#$0643, #$FED9, #$FEDA, #$FEDC, #$FEDB);
  cLam     : T5Ch = (#$0644, #$FEDD, #$FEDE, #$FEE0, #$FEDF);
  cMim     : T5Ch = (#$0645, #$FEE1, #$FEE2, #$FEE4, #$FEE3);
  cNun     : T5Ch = (#$0646, #$FEE5, #$FEE6, #$FEE8, #$FEE7);
  cHa      : T5Ch = (#$0647, #$FEE9, #$FEEA, #$FEEC, #$FEEB);
  cWaw     : T3Ch = (#$0648, #$FEED, #$FEEE);
  cYa      : T5Ch = (#$064A, #$FEF1, #$FEF2, #$FEF4, #$FEF3);
  cAlifMad : T3Ch = (#$0622, #$FE81, #$FE82);
  cTaMar   : T3Ch = (#$0629, #$FE93, #$FE94);
  cAlifMaq : T3Ch = (#$0649, #$FEEF, #$FEF0);

type
  TChAsEnd = array[0..6] of Char;
const
  cChAsEnd :   TChAsEnd = (#$0627, #$062F, #$0630, #$0631, #$0632, #$0648, #$0622);

function ChrInSet(C: Char; const ChSet: array of Char): Boolean;
var
  Ch: Char;
begin
  Result := False;
  for Ch in ChSet do begin
    if Ch = C then begin
      Result := True;
      Exit;
    end;
  end;
end;

function _GetArabicChar(const AText: string; CharPos: Integer): Char;
var
  Ch  : Char;
  Len : Integer;
begin
  Len := Length(AText);
  if CharPos > Len then Exit;
  Ch  := Chr(Ord(Atext[CharPos]));

  if (ChrInSet(Atext[CharPos-1], cChAsEnd)) then begin
    if CharPos = Len then
      Exit(Ch)
    else begin
      CharPos := 1; // retrun a beginning form
    end;
  end;

  Result := Ch;

  if ( Ch = cAlif[0] ) then begin
    if (CharPos = 1) then
      Result := Ch
    else
      Result := cAlif[2]
  end;

  if ( Ch = cBa[0] ) then begin
    Result := cBa[3]; // defualt result is the Middle

    if (CharPos = Len) then
      Result := cBa[2];

    if (CharPos = 1) then
      Result := cBa[4];
  end;

  if ( Ch = cTa[0] ) then begin
    Result := cTa[3];

    if (CharPos = Len) then
      Result := cTa[2];

    if (CharPos = 1) then
      Result := cTa[4];
  end;

  if ( Ch = cTha[0] ) then begin
    Result := cTha[3];

    if (CharPos = Len) then
      Result := cTha[2];

    if (CharPos = 1) then
      Result := cTha[4];
  end;

  if ( Ch = cGim[0] ) then begin
    Result := cGim[3];

    if (CharPos = Len) then
      Result := cGim[2];

    if (CharPos = 1) then
      Result := cGim[4];
  end;

  if ( Ch = c7a[0] ) then begin
    Result := c7a[3];

    if (CharPos = Len) then
      Result := c7a[2];

    if (CharPos = 1) then
      Result := c7a[4];
  end;

  if ( Ch = c5a[0] ) then begin
    Result := c5a[3];

    if (CharPos = Len) then
      Result := c5a[2];

    if (CharPos = 1) then
      Result := c5a[4];
  end;

  if ( Ch = cDal[0] ) then begin
    if (CharPos = 1) then
      Result := Ch
    else
      Result := cDal[2]
  end;

  if ( Ch = cThal[0] ) then begin
    if (CharPos = 1) then
      Result := Ch
    else
      Result := cThal[2]
  end;

  if ( Ch = cRa[0] ) then begin
    if (CharPos = 1) then
      Result := Ch
    else
      Result := cRa[2]
  end;

  if ( Ch = cZa[0] ) then begin
    if (CharPos = 1) then
      Result := Ch
    else
      Result := cZa[2]
  end;

  if ( Ch = cSin[0] ) then begin
    Result := cSin[3];

    if (CharPos = Len) then
      Result := cSin[2];

    if (CharPos = 1) then
      Result := cSin[4];
  end;

  if ( Ch = cShin[0] ) then begin
    Result := cShin[3];

    if (CharPos = Len) then
      Result := cShin[2];

    if (CharPos = 1) then
      Result := cShin[4];
  end;

  if ( Ch = cSad[0] ) then begin
    Result := cSad[3];

    if (CharPos = Len) then
      Result := cSad[2];

    if (CharPos = 1) then
      Result := cSad[4];
  end;

  if ( Ch = cDad[0] ) then begin
    Result := cDad[3];

    if (CharPos = Len) then
      Result := cDad[2];

    if (CharPos = 1) then
      Result := cDad[4];
  end;

  if ( Ch = c6a[0] ) then begin
    Result := c6a[3];

    if (CharPos = Len) then
      Result := c6a[2];

    if (CharPos = 1) then
      Result := c6a[4];
  end;

  if ( Ch = c6_a[0] ) then begin
    Result := c6_a[3];

    if (CharPos = Len) then
      Result := c6_a[2];

    if (CharPos = 1) then
      Result := c6_a[4];
  end;

  if ( Ch = cAyn[0] ) then begin
    Result := cAyn[3];

    if (CharPos = Len) then
      Result := cAyn[2];

    if (CharPos = 1) then
      Result := cAyn[4];
  end;

  if ( Ch = cGayn[0] ) then begin
    Result := cGayn[3];

    if (CharPos = Len) then
      Result := cGayn[2];

    if (CharPos = 1) then
      Result := cGayn[4];
  end;

  if ( Ch = cFa[0] ) then begin
    Result := cFa[3];

    if (CharPos = Len) then
      Result := cFa[2];

    if (CharPos = 1) then
      Result := cFa[4];
  end;

  if ( Ch = cQaf[0] ) then begin
    Result := cQaf[3];

    if (CharPos = Len) then
      Result := cQaf[2];

    if (CharPos = 1) then
      Result := cQaf[4];
  end;

  if ( Ch = cKaf[0] ) then begin
    Result := cKaf[3];

    if (CharPos = Len) then
      Result := cKaf[2];

    if (CharPos = 1) then
      Result := cKaf[4];
  end;

  if ( Ch = cLam[0] ) then begin
    Result := cLam[3];

    if (CharPos = Len) then
      Result := cLam[2];

    if (CharPos = 1) then
      Result := cLam[4];
  end;

  if ( Ch = cMim[0] ) then begin
    Result := cMim[3];

    if (CharPos = Len) then
      Result := cMim[2];

    if (CharPos = 1) then
      Result := cMim[4];
  end;

  if ( Ch = cNun[0] ) then begin
    Result := cNun[3];

    if (CharPos = Len) then
      Result := cNun[2];

    if (CharPos = 1) then
      Result := cNun[4];
  end;

  if ( Ch = cHa[0] ) then begin
    Result := cHa[3];

    if (CharPos = Len) then
      Result := cHa[2];

    if (CharPos = 1) then
      Result := cHa[4];
  end;

  if ( Ch = cWaw[0] ) then begin
    if (CharPos = 1) then
      Result := Ch
    else
      Result := cWaw[2]
  end;

  if ( Ch = cYa[0] ) then begin
    Result := cYa[3];

    if (CharPos = Len) then
      Result := cYa[2];

    if (CharPos = 1) then
      Result := cYa[4];
  end;

  if ( Ch = cAlifMad[0] ) then begin
    if (CharPos = 1) then
      Result := Ch
    else
      Result := cAlifMad[2]
  end;

  if ( Ch = cTaMar[0] ) then begin
    if (CharPos = 1) then
      Result := Ch
    else
      Result := cTaMar[2]
  end;

  if ( Ch = cAlifMaq[0] ) then begin
    if (CharPos = 1) then
      Result := Ch
    else
      Result := cAlifMaq[2]
  end;
end;

The Usage/Output as follow:

procedure TfMain.btn2Click(Sender: TObject);
const
  cMiddle    = 'يبداء';
  cBeginning = 'بداء';
  cEnd1       = 'ألعاب';
  cEnd2       = 'سحب';
begin
  ShowMessage(IntToHex(Ord(_GetArabicChar(cMiddle, 2)), 4));            // return FE92
  ShowMessage(IntToHex(Ord(_GetArabicChar(cBeginning, 1)), 4));         // return FE91
  ShowMessage(IntToHex(Ord(_GetArabicChar(cEnd2, Length((cEnd2)))), 4));// return FE90
  ShowMessage(IntToHex(Ord(_GetArabicChar(cEnd1, Length((cEnd1)))), 4));// return 0628
end;


来源:https://stackoverflow.com/questions/42402581/get-the-middle-beginning-end-arabic-char-in-string

易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!