| 1 | <?php |
|---|
| 2 | require_once("MTUtil.php"); |
|---|
| 3 | function sanitize($s, $arg) { |
|---|
| 4 | if (($arg) && (!is_array($arg))) |
|---|
| 5 | $arg = sanitize_parse_spec($arg); |
|---|
| 6 | $ok_tags = $arg['ok']; |
|---|
| 7 | $tag_attr = $arg['tag_attr']; |
|---|
| 8 | $s = preg_replace('/\x00/', '', $s); |
|---|
| 9 | $closings = array('<'.'?' => '?'.'>', '<!--' => '-->', '<%' => '%>'); |
|---|
| 10 | $tokens = preg_split('/(<(?:!--|%|\?)|<\/\w*|<\w*|(?:-->|%>|\?'.'>|>))/', $s, -1, PREG_SPLIT_DELIM_CAPTURE); |
|---|
| 11 | $open_tag_a = array(); |
|---|
| 12 | $open_tag_h = array(); |
|---|
| 13 | |
|---|
| 14 | $toknum = 0; |
|---|
| 15 | $result = ''; |
|---|
| 16 | while ($toknum < count($tokens)) { |
|---|
| 17 | $token = $tokens[$toknum]; |
|---|
| 18 | if (isset($closings[$token])) { |
|---|
| 19 | $toknum = sanitize_tokens_up_to($tokens, $toknum, $closings[$token]); |
|---|
| 20 | } elseif (substr($token, 0, 1) == '<') { |
|---|
| 21 | $closure = 0; |
|---|
| 22 | $name = strtolower(substr($token, 1)); |
|---|
| 23 | $start = $toknum; |
|---|
| 24 | $end = sanitize_tokens_up_to($tokens, $start, '>'); |
|---|
| 25 | $toknum = $end; |
|---|
| 26 | if (substr($name, 0, 1) == '/') { |
|---|
| 27 | $name = substr($name, 1); |
|---|
| 28 | $closure = 1; |
|---|
| 29 | } |
|---|
| 30 | if (isset($ok_tags[$name])) { |
|---|
| 31 | if ($tag_attr[$name] == '/') |
|---|
| 32 | $closure = 2; |
|---|
| 33 | |
|---|
| 34 | # process attribute list... |
|---|
| 35 | $inside = sanitize_output_tokens($tokens, $start + 1, $end - 1); |
|---|
| 36 | if (preg_match('!/>$!', $inside)) |
|---|
| 37 | $closure = 2; |
|---|
| 38 | $inside = preg_replace('!/?>$!', '', $inside); |
|---|
| 39 | $attrs = ''; |
|---|
| 40 | if (preg_match_all('/\s*(\w+)\s*=(?:([\'"])(.*?)\2|([^\s]+))\s*/s', $inside, $matches, PREG_SET_ORDER)) { |
|---|
| 41 | foreach ($matches as $match) { |
|---|
| 42 | $attr = strtolower($match[1]); |
|---|
| 43 | if (isset($match[4])) { |
|---|
| 44 | $value = $match[4]; |
|---|
| 45 | $value = '"' . preg_replace('/"/', '"', $value) . '"'; |
|---|
| 46 | $dec_val = decode_html($match[4]); |
|---|
| 47 | } else { |
|---|
| 48 | $value = $match[2] . $match[3] . $match[2]; |
|---|
| 49 | $dec_val = decode_html($match[3]); |
|---|
| 50 | } |
|---|
| 51 | if (isset($ok_tags[$name][$attr]) || |
|---|
| 52 | isset($ok_tags['*'][$attr])) { |
|---|
| 53 | $safe = 1; |
|---|
| 54 | if (preg_match('/^(src|href|dynsrc)$/', $attr)) { |
|---|
| 55 | $dec_val = preg_replace('/�*58(?:=;|[^0-9])/', ':', $dec_val); |
|---|
| 56 | $dec_val = preg_replace('/�*3[Aa](?:=;|[^a-fA-F0-9])/', ':', $dec_val); |
|---|
| 57 | if (preg_match('/^([\s\S]+?):/', $dec_val, $proto_match)) { |
|---|
| 58 | $proto = $proto_match[1]; |
|---|
| 59 | if (preg_match('/[\r\n\t]/', $proto)) { |
|---|
| 60 | $safe = 0; |
|---|
| 61 | } else { |
|---|
| 62 | $proto = preg_replace('/\s+/s', '', $proto); |
|---|
| 63 | if (preg_match('/[^a-zA-Z0-9\\+]/', $proto)) |
|---|
| 64 | $safe = 0; |
|---|
| 65 | elseif (preg_match('/script$/i', $proto)) |
|---|
| 66 | $safe = 0; |
|---|
| 67 | } |
|---|
| 68 | } |
|---|
| 69 | } |
|---|
| 70 | if ($safe) |
|---|
| 71 | $attrs .= ' ' . $attr . '=' . $value; |
|---|
| 72 | } |
|---|
| 73 | } |
|---|
| 74 | } |
|---|
| 75 | |
|---|
| 76 | if (($closure != 1) || ($closure == 1 && isset($open_tag_h[$name]))) { |
|---|
| 77 | if ($closure == 1) { |
|---|
| 78 | $result .= sanitize_expel_up_to($open_tag_a, $open_tag_h, $name); |
|---|
| 79 | } elseif (!$closure) { |
|---|
| 80 | $open_tag_a[] = $name; |
|---|
| 81 | $open_tag_h[$name]++; |
|---|
| 82 | } |
|---|
| 83 | } |
|---|
| 84 | $result .= '<' . |
|---|
| 85 | ($closure == 1 ? '/' : '') . |
|---|
| 86 | $name . |
|---|
| 87 | $attrs . |
|---|
| 88 | ($closure == 2 ? ' /' : '') . '>'; |
|---|
| 89 | if ($closure == 1) |
|---|
| 90 | $open_tag_h[$name]--; |
|---|
| 91 | } |
|---|
| 92 | } else { |
|---|
| 93 | if (strlen($token) > 0) |
|---|
| 94 | $result .= $token; |
|---|
| 95 | $toknum++; |
|---|
| 96 | } |
|---|
| 97 | } |
|---|
| 98 | $result .= sanitize_expel_up_to($open_tag_a, $open_tag_h, null); |
|---|
| 99 | return $result; |
|---|
| 100 | } |
|---|
| 101 | |
|---|
| 102 | function sanitize_parse_spec($a) { |
|---|
| 103 | $ok_tags = array(); |
|---|
| 104 | $tag_attr = array(); |
|---|
| 105 | $rules = preg_split('/\s*,\s*/', $a); |
|---|
| 106 | foreach ($rules as $rule) { |
|---|
| 107 | $ok_attr = array(); |
|---|
| 108 | $tag = strtolower($rule); |
|---|
| 109 | $style = ''; |
|---|
| 110 | if (preg_match('|^([^\s]+)\s+(.+)$|', $tag, $matches)) { |
|---|
| 111 | $tag = $matches[1]; |
|---|
| 112 | $attrs = $matches[2]; |
|---|
| 113 | if (preg_match('!/$!', $tag)) { |
|---|
| 114 | $tag = substr($tag, 0, strlen($tag) - 1); |
|---|
| 115 | $style = '/'; |
|---|
| 116 | } |
|---|
| 117 | $a_attr = preg_split('/\s+/', $attrs); |
|---|
| 118 | foreach ($a_attr as $attr) { |
|---|
| 119 | $ok_attr[$attr] = 1; |
|---|
| 120 | } |
|---|
| 121 | } else { |
|---|
| 122 | if (preg_match('!/$!', $tag)) { |
|---|
| 123 | $tag = substr($tag, 0, strlen($tag) - 1); |
|---|
| 124 | $style = '/'; |
|---|
| 125 | } |
|---|
| 126 | } |
|---|
| 127 | if ($style) $tag_attr[$tag] = $style; |
|---|
| 128 | $ok_tags[$tag] = count($ok_attr) ? $ok_attr : 1; |
|---|
| 129 | } |
|---|
| 130 | return array('ok' => $ok_tags, 'tag_attr' => $tag_attr); |
|---|
| 131 | } |
|---|
| 132 | |
|---|
| 133 | function sanitize_expel_up_to(&$open_tag_a, &$open_tag_h, $stop_tag) { |
|---|
| 134 | $out = ''; |
|---|
| 135 | while (count($open_tag_a) && |
|---|
| 136 | (empty($stop_tag) || $open_tag_a[count($open_tag_a)-1] != $stop_tag)) { |
|---|
| 137 | $t = array_pop($open_tag_a); |
|---|
| 138 | $open_tag_h[$t]--; |
|---|
| 139 | $out .= '</' . $t . '>'; |
|---|
| 140 | } |
|---|
| 141 | if (count($open_tag_a)) |
|---|
| 142 | $t = array_pop($open_tag_a); |
|---|
| 143 | return $out; |
|---|
| 144 | } |
|---|
| 145 | |
|---|
| 146 | function sanitize_tokens_up_to($tokens, $i, $closure) { |
|---|
| 147 | while ($i < count($tokens)) { |
|---|
| 148 | if ($tokens[$i++] == $closure) |
|---|
| 149 | break; |
|---|
| 150 | } |
|---|
| 151 | return $i; |
|---|
| 152 | } |
|---|
| 153 | |
|---|
| 154 | function sanitize_output_tokens($tokens, $start, $end) { |
|---|
| 155 | $out = ''; |
|---|
| 156 | for ($i = $start; $i <= $end; $i++) |
|---|
| 157 | $out .= $tokens[$i]; |
|---|
| 158 | return $out; |
|---|
| 159 | } |
|---|
| 160 | ?> |
|---|