domingo, 22 de noviembre de 2009

Convertir codigo HTML a caracteres ASCII y viceversa

En java puede utilizar este programa para convertir texto con caracteres españoles en código HTML hacia los caracteres conocidos como ASCII

Use el siguiente programa:


import java.util.Map;
import java.util.Hashtable;

public class Convert {

 private static Map<String, String> entities;
 static {
  entities = new Hashtable<String, String>();
  entities.put("acute", "\u00B4");
  entities.put("quot", "\"");
  entities.put("amp", "\u0026");
  entities.put("lt", "\u003C");
  entities.put("gt", "\u003E");
  entities.put("nbsp", "\u00A0");
  entities.put("iexcl", "\u00A1");
  entities.put("cent", "\u00A2");
  entities.put("pound", "\u00A3");
  entities.put("curren", "\u00A4");
  entities.put("yen", "\u00A5");
  entities.put("brvbar", "\u00A6");
  entities.put("sect", "\u00A7");
  entities.put("uml", "\u00A8");
  entities.put("copy", "\u00A9");
  entities.put("ordf", "\u00AA");
  entities.put("laquo", "\u00AB");
  entities.put("not", "\u00AC");
  entities.put("shy", "\u00AD");
  entities.put("reg", "\u00AE");
  entities.put("macr", "\u00AF");
  entities.put("deg", "\u00B0");
  entities.put("plusmn", "\u00B1");
  entities.put("sup2", "\u00B2");
  entities.put("sup3", "\u00B3");
  entities.put("acute", "\u00B4");
  entities.put("micro", "\u00B5");
  entities.put("para", "\u00B6");
  entities.put("middot", "\u00B7");
  entities.put("cedil", "\u00B8");
  entities.put("sup1", "\u00B9");
  entities.put("ordm", "\u00BA");
  entities.put("raquo", "\u00BB");
  entities.put("frac14", "\u00BC");
  entities.put("frac12", "\u00BD");
  entities.put("frac34", "\u00BE");
  entities.put("iquest", "\u00BF");
  entities.put("Agrave", "\u00C0");
  entities.put("Aacute", "\u00C1");
  entities.put("Acirc", "\u00C2");
  entities.put("Atilde", "\u00C3");
  entities.put("Auml", "\u00C4");
  entities.put("Aring", "\u00C5");
  entities.put("AElig", "\u00C6");
  entities.put("Ccedil", "\u00C7");
  entities.put("Egrave", "\u00C8");
  entities.put("Eacute", "\u00C9");
  entities.put("Ecirc", "\u00CA");
  entities.put("Euml", "\u00CB");
  entities.put("Igrave", "\u00CC");
  entities.put("Iacute", "\u00CD");
  entities.put("Icirc", "\u00CE");
  entities.put("Iuml", "\u00CF");
  entities.put("ETH", "\u00D0");
  entities.put("Ntilde", "\u00D1");
  entities.put("Ograve", "\u00D2");
  entities.put("Oacute", "\u00D3");
  entities.put("Ocirc", "\u00D4");
  entities.put("Otilde", "\u00D5");
  entities.put("Ouml", "\u00D6");
  entities.put("times", "\u00D7");
  entities.put("Oslash", "\u00D8");
  entities.put("Ugrave", "\u00D9");
  entities.put("Uacute", "\u00DA");
  entities.put("Ucirc", "\u00DB");
  entities.put("Uuml", "\u00DC");
  entities.put("Yacute", "\u00DD");
  entities.put("THORN", "\u00DE");
  entities.put("szlig", "\u00DF");
  entities.put("agrave", "\u00E0");
  entities.put("aacute", "\u00E1");
  entities.put("acirc", "\u00E2");
  entities.put("atilde", "\u00E3");
  entities.put("auml", "\u00E4");
  entities.put("aring", "\u00E5");
  entities.put("aelig", "\u00E6");
  entities.put("ccedil", "\u00E7");
  entities.put("egrave", "\u00E8");
  entities.put("eacute", "\u00E9");
  entities.put("ecirc", "\u00EA");
  entities.put("euml", "\u00EB");
  entities.put("igrave", "\u00EC");
  entities.put("iacute", "\u00ED");
  entities.put("icirc", "\u00EE");
  entities.put("iuml", "\u00EF");
  entities.put("eth", "\u00F0");
  entities.put("ntilde", "\u00F1");
  entities.put("ograve", "\u00F2");
  entities.put("oacute", "\u00F3");
  entities.put("ocirc", "\u00F4");
  entities.put("otilde", "\u00F5");
  entities.put("ouml", "\u00F6");
  entities.put("divide", "\u00F7");
  entities.put("oslash", "\u00F8");
  entities.put("ugrave", "\u00F9");
  entities.put("uacute", "\u00FA");
  entities.put("ucirc", "\u00FB");
  entities.put("uuml", "\u00FC");
  entities.put("yacute", "\u00FD");
  entities.put("thorn", "\u00FE");
  entities.put("yuml", "\u00FF");
 }

 public static String decode(String str) {
  StringBuffer ostr = new StringBuffer();
  int i1 = 0;
  int i2 = 0;

  while (i2 < str.length()) {
   i1 = str.indexOf("&", i2);
   if (i1 == -1) {
    ostr.append(str.substring(i2, str.length()));
    break;
   }
   ostr.append(str.substring(i2, i1));
   i2 = str.indexOf(";", i1);
   if (i2 == -1) {
    ostr.append(str.substring(i1, str.length()));
    break;
   }

   String tok = str.substring(i1 + 1, i2);
   if (tok.charAt(0) == \'#\') {
    tok = tok.substring(1);
    try {
     int radix = 10;
     if (tok.trim().charAt(0) == \'x\') {
      radix = 16;
      tok = tok.substring(1, tok.length());
     }
     ostr.append((char) Integer.parseInt(tok, radix));
    } catch (NumberFormatException exp) {
     ostr.append(\'?\');
    }
   } else {
    tok = (String) entities.get(tok);
    if (tok != null)
     ostr.append(tok);
    else
     ostr.append(\'?\');
   }
   i2++;
  }
  return ostr.toString();
 }

 public static String encode(String str) {
  StringBuffer ostr = new StringBuffer();

  for (int i = 0; i < str.length(); i++) {
   boolean found = false;
   for (Map.Entry<String, String> par : entities.entrySet()) {
    if (par.getValue().equals(str.charAt(i) + "")) {
     found = true;
     ostr.append("&" + par.getKey() + ";");
     break;
    }
   }
   if (found == false) {
    ostr.append(str.charAt(i));
   }
  }
  return ostr.toString();
 }

 public static void main(String[] args) {
  String htmlCode = "&lt;p&gt;Es un p&aacute;rrafo&lt;/p&gt;";
  System.out.println(htmlCode);
  System.out.println(decode(htmlCode));
  String html = "<b>Año del árbol</b>";
  System.out.println(html);
  System.out.println(encode(html));
 }

}

Con lo cual resulta:


&lt;p&gt;Es un p&aacute;rrafo&lt;/p&gt;
<p>Es un párrafo</p>

<b>Año del árbol</b>
&lt;b&gt;A&ntilde;o del &aacute;rbol&lt;/b&gt;
Compartir:

0 comentarios:

Publicar un comentario