Convert html entities

Status
Not open for further replies.

upark

New Member
Licensed User
Longtime User
Is there any function or library to decode html entities?

for example,

input string => "hello, <world>"

output string => "hello, <world>"
 

Erel

B4X founder
Staff member
Licensed User
Longtime User
Upvote 0

NeoTechni

Well-Known Member
Licensed User
Longtime User
In case anyone still needs this (I did)
Erel: Your link helped, it's how I got the info I needed

B4X:
private Sub isHex(Text As String) As Boolean
   Dim temp As Int, tempstr As String, A As Int = Asc("a"), F As Int = Asc("f"), temp2 As Int
   For temp = 0 To Text.Length-1
       tempstr = Mid(Text, temp, 1)
       If Not(IsNumber(tempstr)) Then
           temp2 = Asc(tempstr.ToLowerCase)
           If temp2 < A Or temp2 > F Then Return False
       End If
   Next
   Return True
End Sub
private Sub IsLcase(Text As String) As Boolean
   Dim A As Int = Asc("a"), Z As Int = Asc("z"), Digit As Int, temp As Int
   For temp = 0 To Text.Length-1
       Digit = Asc(Mid(Text, temp, 1))
       If Digit < A Or Digit > Z Then Return False
   Next
   Return True
End Sub
private Sub IIF(Value As Boolean , IFtrue As Object, IfFalse As Object) As Object
   If Value Then Return IFtrue
   Return IfFalse
End Sub
public Sub HTMLentities(Text As String) As String
   Dim Index As Int = Text.IndexOf("&"), Index2 As Int, tempstr As String, tempstr2 As String, Value As Int = -1, IsLcased As Boolean
   Do While Index > -1
       Index2 = Text.IndexOf2(";", Index)
       If Index2 = -1 Then
           Index = -1
       Else
           tempstr = Mid(Text, Index + 1, Index2 - Index - 1)
           Value = -1
           If tempstr.StartsWith("#") Then'&#nnnn; where nnnn = decimal number
               tempstr2 = tempstr.Length-1
               If IsNumber(tempstr2) Then Value = tempstr2
           else if tempstr.StartsWith("#x") Then'&#xhhhh; where hhhh = hexadecimal number
               tempstr2 = Right(tempstr, tempstr.Length-2)
               If isHex(tempstr2) Then Value = ToDecimal(tempstr2)
           End If
           If Value = -1 Then '&name; where name = name of entity
               tempstr2 = ""
               IsLcased = IsLcase(tempstr)
               Select Case tempstr.ToLowerCase
                   Case "shy", "zwnj", "zwj", "lrm", "rlm": tempstr2 = ""'these are not handled properly
                       
                   Case "quot":        tempstr2 = """"
                   Case "amp":          tempstr2 = "&"
                   Case "nbsp":       tempstr2 = " "
                   Case "apos":       tempstr2 = "'"
                   Case "lt":           tempstr2 = "<"
                   Case "gt":           tempstr2 = ">"
                   Case "exclamation":   tempstr2 = "!"
                   Case "percent":       tempstr2 = "%"
                   Case "add":           tempstr2 = "+"
                   Case "equal":       tempstr2 = "="
                   Case "iexcl":       tempstr2 = "¡"
                   Case "cent":       tempstr2 = "¢"
                   Case "pound":       tempstr2 = "£"
                   Case "curren":       tempstr2 = "¤"
                   Case "yen":           tempstr2 = "¥"
                   Case "brvbar":       tempstr2 = "¦"
                   Case "sect":          tempstr2 = "§"
                   Case "uml":          tempstr2 = "¨"
                   Case "copy":          tempstr2 = "©"
                   Case "ordf":          tempstr2 = "ª"
                   Case "laquo":          tempstr2 = "«"
                   Case "not":          tempstr2 = "¬"
                   Case "reg":          tempstr2 = "®"
                   Case "macr":          tempstr2 = "¯"
                   Case "deg":          tempstr2 = "°"
                   Case "plusmn":      tempstr2 = "±"
                   Case "sup2":          tempstr2 = "²"
                   Case "sup3":          tempstr2 = "³"
                   Case "acute":          tempstr2 = "´"
                   Case "micro":          tempstr2 = "µ"
                   Case "para":          tempstr2 = "¶"
                   Case "middot":      tempstr2 = "·"
                   Case "cedil":          tempstr2 = "¸"
                   Case "sup1":          tempstr2 = "¹"
                   Case "ordm":          tempstr2 = "º"
                   Case "raquo":          tempstr2 = "»"
                   Case "frac14":      tempstr2 = "¼"
                   Case "frac12":      tempstr2 = "½"
                   Case "frac34":      tempstr2 = "¾"
                   Case "iquest":      tempstr2 = "¿"
                   Case "times":          tempstr2 = "×"
                   Case "szlig":          tempstr2 = "ß"
                   Case "fnof":          tempstr2 = "ƒ"
                   Case "circ":          tempstr2 = "ˆ"
                   Case "tilde":          tempstr2 = "˜"
                   Case "divide":      tempstr2 = "÷"
                           
                   Case "agrave":      tempstr2 = IIF(IsLcased, "à", "À")
                   Case "aacute":      tempstr2 = IIF(IsLcased, "á", "Á")
                   Case "acirc":          tempstr2 = IIF(IsLcased, "â", "Â")
                   Case "atilde":      tempstr2 = IIF(IsLcased, "ã", "Ã")
                   Case "auml":          tempstr2 = IIF(IsLcased, "ä", "Ä")
                   Case "aring":          tempstr2 = IIF(IsLcased, "å", "Å")
                   Case "aelig":          tempstr2 = IIF(IsLcased, "æ", "Æ")
                   Case "ccedil":      tempstr2 = IIF(IsLcased, "ç", "Ç")
                   Case "egrave":      tempstr2 = IIF(IsLcased, "è", "È")
                   Case "eacute":      tempstr2 = IIF(IsLcased, "é", "É")
                   Case "ecirc":          tempstr2 = IIF(IsLcased, "ê", "Ê")
                   Case "euml":        tempstr2 = IIF(IsLcased, "ë", "Ë")
                   Case "igrave":      tempstr2 = IIF(IsLcased, "ì", "Ì")
                   Case "iacute":      tempstr2 = IIF(IsLcased, "í", "Í")
                   Case "icirc":          tempstr2 = IIF(IsLcased, "î", "Î")
                   Case "iuml":          tempstr2 = IIF(IsLcased, "ï", "Ï")
                   Case "eth":          tempstr2 = IIF(IsLcased, "ð", "Ð")
                   Case "ntilde":      tempstr2 = IIF(IsLcased, "ñ", "Ñ")
                   Case "ograve":      tempstr2 = IIF(IsLcased, "ò", "Ò")
                   Case "oacute":      tempstr2 = IIF(IsLcased, "ó", "Ó")
                   Case "ocirc":          tempstr2 = IIF(IsLcased, "ô", "Ô")
                   Case "otilde":      tempstr2 = IIF(IsLcased, "õ", "Õ")
                   Case "ouml":          tempstr2 = IIF(IsLcased, "ö", "Ö")
                   Case "oslash":      tempstr2 = IIF(IsLcased, "ø", "Ø")
                   Case "ugrave":      tempstr2 = IIF(IsLcased, "ù", "Ù")
                   Case "uacute":      tempstr2 = IIF(IsLcased, "ú", "Ú")
                   Case "ucirc":          tempstr2 = IIF(IsLcased, "û", "Û")
                   Case "uuml":          tempstr2 = IIF(IsLcased, "ü", "Ü")
                   Case "yacute":      tempstr2 = IIF(IsLcased, "ý", "Ý")
                   Case "thorn":          tempstr2 = IIF(IsLcased, "þ", "Þ")
                   Case "yuml":          tempstr2 = IIF(IsLcased, "ÿ", "Ÿ")
                   Case "oelig":          tempstr2 = IIF(IsLcased, "œ", "Œ")
                   Case "scaron":      tempstr2 = IIF(IsLcased, "š", "Š")
                   Case "dagger":      tempstr2 = IIF(IsLcased, "†", "‡")
                                           
                   Case "alpha":          tempstr2 = IIF(IsLcased, "α", "Α")
                   Case "beta":          tempstr2 = IIF(IsLcased, "β", "Β")
                   Case "gamma":          tempstr2 = IIF(IsLcased, "γ", "Γ")
                   Case "delta":          tempstr2 = IIF(IsLcased, "δ", "Δ")
                   Case "epsilon":      tempstr2 = IIF(IsLcased, "ε", "Ε")
                   Case "zeta":          tempstr2 = IIF(IsLcased, "ζ", "Ζ")
                   Case "eta":          tempstr2 = IIF(IsLcased, "η", "Η")
                   Case "theta":          tempstr2 = IIF(IsLcased, "θ", "Θ")
                   Case "iota":          tempstr2 = IIF(IsLcased, "ι", "Ι")
                   Case "kappa":          tempstr2 = IIF(IsLcased, "κ", "Κ")
                   Case "lambda":      tempstr2 = IIF(IsLcased, "λ", "Λ")
                   Case "mu":          tempstr2 = IIF(IsLcased, "μ", "Μ")
                   Case "nu":          tempstr2 = IIF(IsLcased, "ν", "Ν")
                   Case "xi":          tempstr2 = IIF(IsLcased, "ξ", "Ξ")
                   Case "omicron":      tempstr2 = IIF(IsLcased, "ο", "Ο")
                   Case "pi":          tempstr2 = IIF(IsLcased, "π", "Π")
                   Case "rho":          tempstr2 = IIF(IsLcased, "ρ", "Ρ")
                   Case "sigma":          tempstr2 = IIF(IsLcased, "σ", "Σ")
                   Case "tau":          tempstr2 = IIF(IsLcased, "τ", "Τ")
                   Case "upsilon":      tempstr2 = IIF(IsLcased, "υ", "Υ")
                   Case "phi":          tempstr2 = IIF(IsLcased, "φ", "Φ")
                   Case "chi":          tempstr2 = IIF(IsLcased, "χ", "Χ")
                   Case "psi":          tempstr2 = IIF(IsLcased, "ψ", "Ψ")
                   Case "omega":          tempstr2 = IIF(IsLcased, "ω", "Ω")
                   Case "prime":          tempstr2 = IIF(IsLcased, "′", "″")
                       
                   Case "sigmaf":        tempstr2 = "ς"
                   Case "thetasym":      tempstr2 = "ϑ"
                   Case "upsih":          tempstr2 = "ϒ"
                   Case "piv":          tempstr2 = "ϖ"
                   Case "ensp":          tempstr2 = " "
                   Case "emsp":          tempstr2 = " "
                   Case "thinsp":      tempstr2 = " "
                   Case "ndash":          tempstr2 = "–"
                   Case "mdash":          tempstr2 = "—"
                   Case "horbar":      tempstr2 = "―"
                   Case "lsquo":          tempstr2 = "‘"
                   Case "rsquo":          tempstr2 = "’"
                   Case "sbquo":          tempstr2 = "‚"
                   Case "ldquo":          tempstr2 = "“"
                   Case "rdquo":          tempstr2 = "”"
                   Case "bdquo":          tempstr2 = "„"
                   Case "bull":          tempstr2 = "•"
                   Case "hellip":      tempstr2 = "…"
                   Case "permil":      tempstr2 = "‰"
                   Case "lsaquo":      tempstr2 = "‹"
                   Case "rsaquo":      tempstr2 = "›"
                   Case "oline":          tempstr2 = "‾"
                   Case "frasl":          tempstr2 = "⁄"
                   Case "euro":          tempstr2 = "€"
                   Case "image":          tempstr2 = "ℑ"
                   Case "weierp":      tempstr2 = "℘"
                   Case "real":          tempstr2 = "ℜ"
                   Case "trade":          tempstr2 = "™"
                   Case "alefsym":      tempstr2 = "ℵ"
                       
                   Case "larr":          tempstr2 = IIF(IsLcased, "←", "⇐")
                   Case "uarr":          tempstr2 = IIF(IsLcased, "↑", "⇑")
                   Case "rarr":          tempstr2 = IIF(IsLcased, "→", "⇒")
                   Case "darr":          tempstr2 = IIF(IsLcased, "↓", "⇓")
                   Case "harr":          tempstr2 = IIF(IsLcased, "↔", "⇔")
                   Case "crarr":          tempstr2 = "↵"
                       
                   Case "forall":      tempstr2 = "∀"
                   Case "part":          tempstr2 = "∂"
                   Case "exist":          tempstr2 = "∃"
                   Case "empty":          tempstr2 = "∅"
                   Case "nabla":          tempstr2 = "∇"
                   Case "isin":          tempstr2 = "∈"
                   Case "notin":          tempstr2 = "∉"
                   Case "ni":          tempstr2 = "∋"
                   Case "prod":        tempstr2 = "∏"
                   Case "sum":          tempstr2 = "∑"
                   Case "minus":          tempstr2 = "-"
                   Case "lowast":      tempstr2 = "∗"
                   Case "radic":          tempstr2 = "√"
                   Case "prop":          tempstr2 = "∝"
                   Case "infin":          tempstr2 = "∞"
                   Case "ang":          tempstr2 = "∠"
                   Case "and":          tempstr2 = "∧"
                   Case "or":          tempstr2 = "∨"
                   Case "cap":          tempstr2 = "∩"
                   Case "cup":          tempstr2 = "∪"
                   Case "int":          tempstr2 = "∫"
                   Case "there4":      tempstr2 = "∴"'braniac!?
                   Case "sim":          tempstr2 = "~"
                   Case "cong":          tempstr2 = "≅"
                   Case "asymp":          tempstr2 = "≈"
                   Case "ne":          tempstr2 = "≠"
                   Case "equiv":        tempstr2 = "≡"
                   Case "le":          tempstr2 = "≤"
                   Case "ge":          tempstr2 = "≥"
                   Case "sub":          tempstr2 = "⊂"
                   Case "sup":          tempstr2 = "⊃"
                   Case "nsub":          tempstr2 = "⊄"
                   Case "sube":          tempstr2 = "⊆"
                   Case "supe":          tempstr2 = "⊇"
                   Case "oplus":          tempstr2 = "⊕"
                   Case "otimes":      tempstr2 = "⊗"
                   Case "perp":          tempstr2 = "⊥"
                   Case "sdot":          tempstr2 = "·"
                   Case "lceil":          tempstr2 = "⌈"
                   Case "rceil":          tempstr2 = "⌉"
                   Case "lfloor":      tempstr2 = "⌊"
                   Case "rfloor":      tempstr2 = "⌋"
                   Case "lang":          tempstr2 = "〈"
                   Case "rang":          tempstr2 = "〉"
                   Case "loz":          tempstr2 = "◊"
                   Case "spades":      tempstr2 = "♠"
                   Case "clubs":          tempstr2 = "♣"
                   Case "hearts":      tempstr2 = "♥"
                   Case "diams":          tempstr2 = "♦"
               End Select
           Else
               tempstr2 = Chr(Value)
           End If
           If tempstr2.Length > 0 Then Text = Text.substring2(0,Index) & tempstr2 & Text.SubString(Index+tempstr.Length+2)
           Index = Text.IndexOf2("&", Index+1)
       End If
   Loop
   Return Text
End Sub

private Sub Left(Text As String, Length As Long)As String
   If Length>Text.Length Then Length=Text.Length
   Return Text.SubString2(0, Length)
End Sub
private Sub Mid(Text As String, Start As Int, Length As Int) As String
   If Length>0 And Start>-1 And Start< Text.Length Then Return Text.SubString2(Start,Start+Length)
   Return ""
End Sub
private Sub Right(Text As String, Length As Long) As String
   If Length>Text.Length Then Length=Text.Length
   Return Text.SubString(Text.Length-Length)
End Sub
public Sub ToDecimal(Hexadecimal As String) As Int
   Return Bit.ParseInt(Hexadecimal, 16)
End Sub
 
Upvote 0
Status
Not open for further replies.