Diskuse JPW: PHP Wordpress plugin a kódování

	Autor	Zpráva
	Kocicadlo Profil	#1 · Zasláno: 10. 7. 2013, 14:18:45 · Upravil/a: Kocicadlo o 8 minut později Odpovědět Citovat Zdravím, nainstaloval jsem si plugin Easy Table ( http://wordpress.org/plugins/easy-table/ ) do Wordpressu a dělá mi nepříjemnou věc, v počátečním slově nového řádku tabulky nezobrazuje písmena s diakritikou, viz. http://cernyjelen.cz/napojovy-listek např. slova červená nebo čaje. Používá soubor Encoding.php kde je pravděpodobně zakopán pes, kód níže, pokud by mi byl někdo schopen poradit, budu velmi vděčen, děkuji moc. Pavel P.S. je mi záhadou, že se tak děje pouze na začátku řádku, dále už je diakritika v pořádku. <?php /** * @author "Sebastian Grignoli" <grignoli@framework2.com.ar> * @package Encoding * @version 1.1 * @link http://www.framework2.com.ar/dzone/forceUTF8-es/ * @example http://www.framework2.com.ar/dzone/forceUTF8-es/ * Class renamed to ForceEncode by "takien" <contact@takien.com> / class ForceEncode { protected static $win1252ToUtf8 = array( 128 => "\xe2\x82\xac", 130 => "\xe2\x80\x9a", 131 => "\xc6\x92", 132 => "\xe2\x80\x9e", 133 => "\xe2\x80\xa6", 134 => "\xe2\x80\xa0", 135 => "\xe2\x80\xa1", 136 => "\xcb\x86", 137 => "\xe2\x80\xb0", 138 => "\xc5\xa0", 139 => "\xe2\x80\xb9", 140 => "\xc5\x92", 142 => "\xc5\xbd", 145 => "\xe2\x80\x98", 146 => "\xe2\x80\x99", 147 => "\xe2\x80\x9c", 148 => "\xe2\x80\x9d", 149 => "\xe2\x80\xa2", 150 => "\xe2\x80\x93", 151 => "\xe2\x80\x94", 152 => "\xcb\x9c", 153 => "\xe2\x84\xa2", 154 => "\xc5\xa1", 155 => "\xe2\x80\xba", 156 => "\xc5\x93", 158 => "\xc5\xbe", 159 => "\xc5\xb8" ); protected static $brokenUtf8ToUtf8 = array( "\xc2\x80" => "\xe2\x82\xac", "\xc2\x82" => "\xe2\x80\x9a", "\xc2\x83" => "\xc6\x92", "\xc2\x84" => "\xe2\x80\x9e", "\xc2\x85" => "\xe2\x80\xa6", "\xc2\x86" => "\xe2\x80\xa0", "\xc2\x87" => "\xe2\x80\xa1", "\xc2\x88" => "\xcb\x86", "\xc2\x89" => "\xe2\x80\xb0", "\xc2\x8a" => "\xc5\xa0", "\xc2\x8b" => "\xe2\x80\xb9", "\xc2\x8c" => "\xc5\x92", "\xc2\x8e" => "\xc5\xbd", "\xc2\x91" => "\xe2\x80\x98", "\xc2\x92" => "\xe2\x80\x99", "\xc2\x93" => "\xe2\x80\x9c", "\xc2\x94" => "\xe2\x80\x9d", "\xc2\x95" => "\xe2\x80\xa2", "\xc2\x96" => "\xe2\x80\x93", "\xc2\x97" => "\xe2\x80\x94", "\xc2\x98" => "\xcb\x9c", "\xc2\x99" => "\xe2\x84\xa2", "\xc2\x9a" => "\xc5\xa1", "\xc2\x9b" => "\xe2\x80\xba", "\xc2\x9c" => "\xc5\x93", "\xc2\x9e" => "\xc5\xbe", "\xc2\x9f" => "\xc5\xb8" ); protected static $utf8ToWin1252 = array( "\xe2\x82\xac" => "\x80", "\xe2\x80\x9a" => "\x82", "\xc6\x92" => "\x83", "\xe2\x80\x9e" => "\x84", "\xe2\x80\xa6" => "\x85", "\xe2\x80\xa0" => "\x86", "\xe2\x80\xa1" => "\x87", "\xcb\x86" => "\x88", "\xe2\x80\xb0" => "\x89", "\xc5\xa0" => "\x8a", "\xe2\x80\xb9" => "\x8b", "\xc5\x92" => "\x8c", "\xc5\xbd" => "\x8e", "\xe2\x80\x98" => "\x91", "\xe2\x80\x99" => "\x92", "\xe2\x80\x9c" => "\x93", "\xe2\x80\x9d" => "\x94", "\xe2\x80\xa2" => "\x95", "\xe2\x80\x93" => "\x96", "\xe2\x80\x94" => "\x97", "\xcb\x9c" => "\x98", "\xe2\x84\xa2" => "\x99", "\xc5\xa1" => "\x9a", "\xe2\x80\xba" => "\x9b", "\xc5\x93" => "\x9c", "\xc5\xbe" => "\x9e", "\xc5\xb8" => "\x9f" ); static function toUTF8($text){ /* * Function Encoding::toUTF8 * * This function leaves UTF8 characters alone, while converting almost all non-UTF8 to UTF8. * * It assumes that the encoding of the original string is either Windows-1252 or ISO 8859-1. * * It may fail to convert characters to UTF-8 if they fall into one of these scenarios: * * 1) when any of these characters: Ă€ĂĂ‚ĂƒĂ„Ă…Ă†Ă‡ĂˆĂ‰ĂŠĂ‹ĂŚĂŤĂŽĂŹĂĂ‘Ă’Ă“Ă”Ă•Ă–Ă—Ă˜Ă™ĂšĂ›ĂśĂťĂžĂź * are followed by any of these: ("group B") * ÂˇÂ˘ÂŁÂ¤ÂĄÂ¦Â§Â¨Â©ÂŞÂ«Â¬ÂÂ®ÂŻÂ°Â±Â˛ÂłÂ´ÂµÂ¶â€˘Â¸ÂąÂşÂ»ÂĽÂ˝ÂľÂż * For example: %ABREPRESENT%C9%BB. Â«REPRESENTĂ‰Â» * The "Â«" (%AB) character will be converted, but the "Ă‰" followed by "Â»" (%C9%BB) * is also a valid unicode character, and will be left unchanged. * * 2) when any of these: Ă ĂˇĂ˘ĂŁĂ¤ĂĄĂ¦Ă§Ă¨Ă©ĂŞĂ«Ă¬ĂĂ®ĂŻ are followed by TWO chars from group B, * 3) when any of these: Ă°Ă±Ă˛Ăł are followed by THREE chars from group B. * * @name toUTF8 * @param string $text Any string. * @return string The same string, UTF8 encoded * */ if(is_array($text)) { foreach($text as $k => $v) { $text[$k] = self::toUTF8($v); } return $text; } elseif(is_string($text)) { $max = strlen($text); $buf = ""; for($i = 0; $i < $max; $i++){ $c1 = $text{$i}; if($c1>="\xc0"){ //Should be converted to UTF8, if it's not UTF8 already $c2 = $i+1 >= $max? "\x00" : $text{$i+1}; $c3 = $i+2 >= $max? "\x00" : $text{$i+2}; $c4 = $i+3 >= $max? "\x00" : $text{$i+3}; if($c1 >= "\xc0" & $c1 <= "\xdf"){ //looks like 2 bytes UTF8 if($c2 >= "\x80" && $c2 <= "\xbf"){ //yeah, almost sure it's UTF8 already $buf .= $c1 . $c2; $i++; } else { //not valid UTF8. Convert it. $cc1 = (chr(ord($c1) / 64) \| "\xc0"); $cc2 = ($c1 & "\x3f") \| "\x80"; $buf .= $cc1 . $cc2; } } elseif($c1 >= "\xe0" & $c1 <= "\xef"){ //looks like 3 bytes UTF8 if($c2 >= "\x80" && $c2 <= "\xbf" && $c3 >= "\x80" && $c3 <= "\xbf"){ //yeah, almost sure it's UTF8 already $buf .= $c1 . $c2 . $c3; $i = $i + 2; } else { //not valid UTF8. Convert it. $cc1 = (chr(ord($c1) / 64) \| "\xc0"); $cc2 = ($c1 & "\x3f") \| "\x80"; $buf .= $cc1 . $cc2; } } elseif($c1 >= "\xf0" & $c1 <= "\xf7"){ //looks like 4 bytes UTF8 if($c2 >= "\x80" && $c2 <= "\xbf" && $c3 >= "\x80" && $c3 <= "\xbf" && $c4 >= "\x80" && $c4 <= "\xbf"){ //yeah, almost sure it's UTF8 already $buf .= $c1 . $c2 . $c3; $i = $i + 2; } else { //not valid UTF8. Convert it. $cc1 = (chr(ord($c1) / 64) \| "\xc0"); $cc2 = ($c1 & "\x3f") \| "\x80"; $buf .= $cc1 . $cc2; } } else { //doesn't look like UTF8, but should be converted $cc1 = (chr(ord($c1) / 64) \| "\xc0"); $cc2 = (($c1 & "\x3f") \| "\x80"); $buf .= $cc1 . $cc2; } } elseif(($c1 & "\xc0") == "\x80"){ // needs conversion if(isset(self::$win1252ToUtf8[ord($c1)])) { //found in Windows-1252 special cases $buf .= self::$win1252ToUtf8[ord($c1)]; } else { $cc1 = (chr(ord($c1) / 64) \| "\xc0"); $cc2 = (($c1 & "\x3f") \| "\x80"); $buf .= $cc1 . $cc2; } } else { // it doesn't need convesion $buf .= $c1; } } return $buf; } else { return $text; } } static function toWin1252($text) { if(is_array($text)) { foreach($text as $k => $v) { $text[$k] = self::toWin1252($v); } return $text; } elseif(is_string($text)) { return utf8_decode(str_replace(array_keys(self::$utf8ToWin1252), array_values(self::$utf8ToWin1252), self::toUTF8($text))); } else { return $text; } } static function toISO8859($text) { return self::toWin1252($text); } static function toLatin1($text) { return self::toWin1252($text); } static function fixUTF8($text){ if(is_array($text)) { foreach($text as $k => $v) { $text[$k] = self::fixUTF8($v); } return $text; } $last = ""; while($last <> $text){ $last = $text; $text = self::toUTF8(utf8_decode(str_replace(array_keys(self::$utf8ToWin1252), array_values(self::$utf8ToWin1252), $text))); } $text = self::toUTF8(utf8_decode(str_replace(array_keys(self::$utf8ToWin1252), array_values(self::$utf8ToWin1252), $text))); return $text; } static function UTF8FixWin1252Chars($text){ // If you received an UTF-8 string that was converted from Windows-1252 as it was ISO8859-1 // (ignoring Windows-1252 chars from 80 to 9F) use this function to fix it. // See: http://en.wikipedia.org/wiki/Windows-1252 return str_replace(array_keys(self::$brokenUtf8ToUtf8), array_values(self::$brokenUtf8ToUtf8), $text); } static function removeBOM($str=""){ if(substr($str, 0,3) == pack("CCC",0xef,0xbb,0xbf)) { $str=substr($str, 3); } return $str; } }
	Časová prodleva: 11 let

Autor

Zpráva

Kocicadlo
Profil

#1 · Zasláno: 10. 7. 2013, 14:18:45 · Upravil/a: Kocicadlo o 8 minut později

Zdravím, nainstaloval jsem si plugin Easy Table ( http://wordpress.org/plugins/easy-table/ ) do Wordpressu a dělá mi nepříjemnou věc, v počátečním slově nového řádku tabulky nezobrazuje písmena s diakritikou, viz. http://cernyjelen.cz/napojovy-listek např. slova červená nebo čaje. Používá soubor Encoding.php kde je pravděpodobně zakopán pes, kód níže, pokud by mi byl někdo schopen poradit, budu velmi vděčen, děkuji moc. Pavel

P.S. je mi záhadou, že se tak děje pouze na začátku řádku, dále už je diakritika v pořádku.

<?php 
/**
 * @author   "Sebastian Grignoli" <grignoli@framework2.com.ar>
 * @package  Encoding
 * @version  1.1
 * @link     http://www.framework2.com.ar/dzone/forceUTF8-es/
 * @example  http://www.framework2.com.ar/dzone/forceUTF8-es/
 * Class renamed to ForceEncode by "takien" <contact@takien.com>
 */

class ForceEncode {
    
  protected static $win1252ToUtf8 = array(
        128 => "\xe2\x82\xac",

        130 => "\xe2\x80\x9a",
        131 => "\xc6\x92",
        132 => "\xe2\x80\x9e",
        133 => "\xe2\x80\xa6",
        134 => "\xe2\x80\xa0",
        135 => "\xe2\x80\xa1",
        136 => "\xcb\x86",
        137 => "\xe2\x80\xb0",
        138 => "\xc5\xa0",
        139 => "\xe2\x80\xb9",
        140 => "\xc5\x92",

        142 => "\xc5\xbd",


        145 => "\xe2\x80\x98",
        146 => "\xe2\x80\x99",
        147 => "\xe2\x80\x9c",
        148 => "\xe2\x80\x9d",
        149 => "\xe2\x80\xa2",
        150 => "\xe2\x80\x93",
        151 => "\xe2\x80\x94",
        152 => "\xcb\x9c",
        153 => "\xe2\x84\xa2",
        154 => "\xc5\xa1",
        155 => "\xe2\x80\xba",
        156 => "\xc5\x93",

        158 => "\xc5\xbe",
        159 => "\xc5\xb8"
  );
  
    protected static $brokenUtf8ToUtf8 = array(
        "\xc2\x80" => "\xe2\x82\xac",
        
        "\xc2\x82" => "\xe2\x80\x9a",
        "\xc2\x83" => "\xc6\x92",
        "\xc2\x84" => "\xe2\x80\x9e",
        "\xc2\x85" => "\xe2\x80\xa6",
        "\xc2\x86" => "\xe2\x80\xa0",
        "\xc2\x87" => "\xe2\x80\xa1",
        "\xc2\x88" => "\xcb\x86",
        "\xc2\x89" => "\xe2\x80\xb0",
        "\xc2\x8a" => "\xc5\xa0",
        "\xc2\x8b" => "\xe2\x80\xb9",
        "\xc2\x8c" => "\xc5\x92",
        
        "\xc2\x8e" => "\xc5\xbd",
        
        
        "\xc2\x91" => "\xe2\x80\x98",
        "\xc2\x92" => "\xe2\x80\x99",
        "\xc2\x93" => "\xe2\x80\x9c",
        "\xc2\x94" => "\xe2\x80\x9d",
        "\xc2\x95" => "\xe2\x80\xa2",
        "\xc2\x96" => "\xe2\x80\x93",
        "\xc2\x97" => "\xe2\x80\x94",
        "\xc2\x98" => "\xcb\x9c",
        "\xc2\x99" => "\xe2\x84\xa2",
        "\xc2\x9a" => "\xc5\xa1",
        "\xc2\x9b" => "\xe2\x80\xba",
        "\xc2\x9c" => "\xc5\x93",
        
        "\xc2\x9e" => "\xc5\xbe",
        "\xc2\x9f" => "\xc5\xb8"
  );
    
  protected static $utf8ToWin1252 = array(
       "\xe2\x82\xac" => "\x80",
       
       "\xe2\x80\x9a" => "\x82",
       "\xc6\x92"     => "\x83",
       "\xe2\x80\x9e" => "\x84",
       "\xe2\x80\xa6" => "\x85",
       "\xe2\x80\xa0" => "\x86",
       "\xe2\x80\xa1" => "\x87",
       "\xcb\x86"     => "\x88",
       "\xe2\x80\xb0" => "\x89",
       "\xc5\xa0"     => "\x8a",
       "\xe2\x80\xb9" => "\x8b",
       "\xc5\x92"     => "\x8c",
       
       "\xc5\xbd"     => "\x8e",
       
       
       "\xe2\x80\x98" => "\x91",
       "\xe2\x80\x99" => "\x92",
       "\xe2\x80\x9c" => "\x93",
       "\xe2\x80\x9d" => "\x94",
       "\xe2\x80\xa2" => "\x95",
       "\xe2\x80\x93" => "\x96",
       "\xe2\x80\x94" => "\x97",
       "\xcb\x9c"     => "\x98",
       "\xe2\x84\xa2" => "\x99",
       "\xc5\xa1"     => "\x9a",
       "\xe2\x80\xba" => "\x9b",
       "\xc5\x93"     => "\x9c",
       
       "\xc5\xbe"     => "\x9e",
       "\xc5\xb8"     => "\x9f"
    );

  static function toUTF8($text){
  /**
   * Function Encoding::toUTF8
   *
   * This function leaves UTF8 characters alone, while converting almost all non-UTF8 to UTF8.
   * 
   * It assumes that the encoding of the original string is either Windows-1252 or ISO 8859-1.
   *
   * It may fail to convert characters to UTF-8 if they fall into one of these scenarios:
   *
   * 1) when any of these characters:   Ă€ĂĂ‚ĂƒĂ„Ă…Ă†Ă‡ĂˆĂ‰ĂŠĂ‹ĂŚĂŤĂŽĂŹĂĂ‘Ă’Ă“Ă”Ă•Ă–Ă—Ă˜Ă™ĂšĂ›ĂśĂťĂžĂź
   *    are followed by any of these:  ("group B")
   *                                    ÂˇÂ˘ÂŁÂ¤ÂĄÂ¦Â§Â¨Â©ÂŞÂ«Â¬ÂÂ®ÂŻÂ°Â±Â˛ÂłÂ´ÂµÂ¶â€˘Â¸ÂąÂşÂ»ÂĽÂ˝ÂľÂż
   * For example:   %ABREPRESENT%C9%BB. Â«REPRESENTĂ‰Â»
   * The "Â«" (%AB) character will be converted, but the "Ă‰" followed by "Â»" (%C9%BB) 
   * is also a valid unicode character, and will be left unchanged.
   *
   * 2) when any of these: Ă ĂˇĂ˘ĂŁĂ¤ĂĄĂ¦Ă§Ă¨Ă©ĂŞĂ«Ă¬ĂĂ®ĂŻ  are followed by TWO chars from group B,
   * 3) when any of these: Ă°Ă±Ă˛Ăł  are followed by THREE chars from group B.
   *
   * @name toUTF8
   * @param string $text  Any string.
   * @return string  The same string, UTF8 encoded
   *
   */

    if(is_array($text))
    {
      foreach($text as $k => $v)
      {
        $text[$k] = self::toUTF8($v);
      }
      return $text;
    } elseif(is_string($text)) {
    
      $max = strlen($text);
      $buf = "";
      for($i = 0; $i < $max; $i++){
          $c1 = $text{$i};
          if($c1>="\xc0"){ //Should be converted to UTF8, if it's not UTF8 already
            $c2 = $i+1 >= $max? "\x00" : $text{$i+1};
            $c3 = $i+2 >= $max? "\x00" : $text{$i+2};
            $c4 = $i+3 >= $max? "\x00" : $text{$i+3};
              if($c1 >= "\xc0" & $c1 <= "\xdf"){ //looks like 2 bytes UTF8
                  if($c2 >= "\x80" && $c2 <= "\xbf"){ //yeah, almost sure it's UTF8 already
                      $buf .= $c1 . $c2;
                      $i++;
                  } else { //not valid UTF8.  Convert it.
                      $cc1 = (chr(ord($c1) / 64) | "\xc0");
                      $cc2 = ($c1 & "\x3f") | "\x80";
                      $buf .= $cc1 . $cc2;
                  }
              } elseif($c1 >= "\xe0" & $c1 <= "\xef"){ //looks like 3 bytes UTF8
                  if($c2 >= "\x80" && $c2 <= "\xbf" && $c3 >= "\x80" && $c3 <= "\xbf"){ //yeah, almost sure it's UTF8 already
                      $buf .= $c1 . $c2 . $c3;
                      $i = $i + 2;
                  } else { //not valid UTF8.  Convert it.
                      $cc1 = (chr(ord($c1) / 64) | "\xc0");
                      $cc2 = ($c1 & "\x3f") | "\x80";
                      $buf .= $cc1 . $cc2;
                  }
              } elseif($c1 >= "\xf0" & $c1 <= "\xf7"){ //looks like 4 bytes UTF8
                  if($c2 >= "\x80" && $c2 <= "\xbf" && $c3 >= "\x80" && $c3 <= "\xbf" && $c4 >= "\x80" && $c4 <= "\xbf"){ //yeah, almost sure it's UTF8 already
                      $buf .= $c1 . $c2 . $c3;
                      $i = $i + 2;
                  } else { //not valid UTF8.  Convert it.
                      $cc1 = (chr(ord($c1) / 64) | "\xc0");
                      $cc2 = ($c1 & "\x3f") | "\x80";
                      $buf .= $cc1 . $cc2;
                  }
              } else { //doesn't look like UTF8, but should be converted
                      $cc1 = (chr(ord($c1) / 64) | "\xc0");
                      $cc2 = (($c1 & "\x3f") | "\x80");
                      $buf .= $cc1 . $cc2;
              }
          } elseif(($c1 & "\xc0") == "\x80"){ // needs conversion
                if(isset(self::$win1252ToUtf8[ord($c1)])) { //found in Windows-1252 special cases
                    $buf .= self::$win1252ToUtf8[ord($c1)];
                } else {
                  $cc1 = (chr(ord($c1) / 64) | "\xc0");
                  $cc2 = (($c1 & "\x3f") | "\x80");
                  $buf .= $cc1 . $cc2;
                }
          } else { // it doesn't need convesion
              $buf .= $c1;
          }
      }
      return $buf;
    } else {
      return $text;
    }
  }

  static function toWin1252($text) {
    if(is_array($text)) {
      foreach($text as $k => $v) {
        $text[$k] = self::toWin1252($v);
      }
      return $text;
    } elseif(is_string($text)) {
      return utf8_decode(str_replace(array_keys(self::$utf8ToWin1252), array_values(self::$utf8ToWin1252), self::toUTF8($text)));
    } else {
      return $text;
    }
  }

  static function toISO8859($text) {
    return self::toWin1252($text);
  }

  static function toLatin1($text) {
    return self::toWin1252($text);
  }

  static function fixUTF8($text){
    if(is_array($text)) {
      foreach($text as $k => $v) {
        $text[$k] = self::fixUTF8($v);
      }
      return $text;
    }

    $last = "";
    while($last <> $text){
      $last = $text;
      $text = self::toUTF8(utf8_decode(str_replace(array_keys(self::$utf8ToWin1252), array_values(self::$utf8ToWin1252), $text)));
    }
    $text = self::toUTF8(utf8_decode(str_replace(array_keys(self::$utf8ToWin1252), array_values(self::$utf8ToWin1252), $text)));
    return $text;
  }
  
  static function UTF8FixWin1252Chars($text){
    // If you received an UTF-8 string that was converted from Windows-1252 as it was ISO8859-1 
    // (ignoring Windows-1252 chars from 80 to 9F) use this function to fix it.
    // See: http://en.wikipedia.org/wiki/Windows-1252
    
    return str_replace(array_keys(self::$brokenUtf8ToUtf8), array_values(self::$brokenUtf8ToUtf8), $text);
  }
  
  static function removeBOM($str=""){
    if(substr($str, 0,3) == pack("CCC",0xef,0xbb,0xbf)) {
      $str=substr($str, 3);
    }
    return $str;
  }
}

Časová prodleva: 11 let

Vaše odpověď

Mohlo by se hodit