html=$htmltoclean; } function GetCleanedHtml() { $this->cleanedHtml=$this->html; for ($i=0;$iallowedtags);$i++) { //arrays for fase 1 cleaning $pseudotags[]=$this->lt.$this->allowedtags[$i].$this->gt."<"; $pseudotags[]=$this->lt.$this->allowedtags[$i].$this->gt."<"; $pseudotags[]=$this->lt."/".$this->allowedtags[$i].$this->gt; $originaltags[]="<".$this->allowedtags[$i]." "; $originaltags[]="<".$this->allowedtags[$i]; $originaltags[]="allowedtags[$i].">"; //arrays for fase 2 cleaning $pseudotags2[]=$this->lt.$this->allowedtags[$i].$this->gt; $pseudotags2[]=$this->lt."/".$this->allowedtags[$i].$this->gt; $originaltags2[]="<".$this->allowedtags[$i].">"; $originaltags2[]="allowedtags[$i].">"; //remove empty tags like

$emptytags1[]="<".$this->allowedtags[$i].">"."allowedtags[$i].">"; $emptytags2[]=""; } //main cleaning utility $this->cleanedHtml=str_replace($originaltags,$pseudotags,$this->cleanedHtml); $this->cleanedHtml=strip_tags($this->cleanedHtml); $this->cleanedHtml=str_replace($pseudotags2,$originaltags2,$this->cleanedHtml); $this->cleanedHtml=preg_replace("/\r\n/","",$this->cleanedHtml); $this->cleanedHtml=preg_replace("/> *<",$this->cleanedHtml); return $this->cleanedHtml; } function allowedTags($tags) { $this->allowedtags=array(); for ($i=0;$i","/>"),array("","",""),$tags[$i]); $this->allowedtags[]=$tag; } } function saveToFile($fname) { $file_content=$this->GetCleanedHtml(); $fp=fopen($fname,"w+"); fwrite($fp,$file_content); fclose($fp); } } ?>html = $html; } function setValidDatas( $tags = NULL, $classes = NULL ) { if ( $tags ) { $this->valid_tags = $tags; } if ( $classes ) { $this->valid_classes = $classes; } } function setValidTags( $tags = array() ) { $this->valid_tags = array(); foreach( $tags as $tag ) { $tag = str_replace( array( "'", '"', '<', '>', '/', '\\' ), array( '', '', '', '', '', '' ), $tag ); $this->valid_tags[] = $tag; } } function removeValidTags( $tags = array() ) { foreach( $tags as $tag ) { $tag = str_replace( array( "'", '"', '<', '>', '/', '\\' ), array( '', '', '', '', '', '' ), $tag ); if ( in_array( $tag, $this->valid_tags ) ) { $k = array_search( $tag, $this->valid_tags ); unset( $this->valid_tags[$k] ); } } } function addValidTags( $tags = array() ) { foreach( $tags as $tag ) { $tag = str_replace( array( "'", '"', '<', '>', '/', '\\' ), array( '', '', '', '', '', '' ), $tag ); if ( in_array( $tag, $this->valid_tags ) ) { $this->valid_tags[] = $tag; } } } function clearHTML() { $this->html_temp = $this->html; preg_match( '/<[a-zA-Z][*]?[^>]*>/', $this->html_temp, $tag ); $i = 0; while( !empty( $tag ) ) { $tag_temp = str_replace( array( "<", "/>", ">" ), array( "", "", "" ), $tag[0] ); $full_tag_infos = explode( " ", $tag[0] ); $tag_infos = explode( " ", $tag_temp ); $tag_name = $tag_infos[0]; $start_tag = $tag[0]; $end_tag = strstr( $tag[0], "/>" ) ? "" : ""; $this->html_temp = str_replace( array( $start_tag, $end_tag ), array( "", "" ), $this->html_temp ); if ( !in_array( $tag_name, $this->valid_tags ) ) { $this->html = str_replace( array( $start_tag, $end_tag ), array( "", "" ), $this->html ); } else { foreach( $full_tag_infos as $k => $v ) { if ( strstr( $v, "class=" ) ) { $class = explode( "=", $v ); $class = str_replace( array( '"', "'" ), array( '', '' ), $class[1] ); if ( !in_array( $class, $this->valid_classes ) ) { unset( $full_tag_infos[$k] ); } } } $this->html = str_replace( $tag, implode( " ", $full_tag_infos ), $this->html ); } $tag = ''; preg_match( '/<[a-zA-Z][*]?[^>]*>/', $this->html_temp, $tag ); } return $this->html; } function fixHTML( $html_temp = '', $tag_arr = array() ) { $this->html_temp = $html_temp ? $html_temp : $this->html; $this->getHTMLTree(); if ( count( $this->error ) ) { } return ""; } function getHTMLTree( $html_temp = '', $tag_arr = array() ) { preg_match( '/<[a-zA-Z/][*]?[^>]*>/', $this->html_temp, $tag ); while ( !empty( $tag ) ) { $tagname = explode( " ", str_replace( array( "<", ">", "/" ), array( "", "", "" ), $tag[0] ) ); $tagname = $tagname[0]; $pos = strpos( $this->html_temp, $tag[0] ); $this->html_temp = substr( $this->html_temp, $pos+strlen( $tag[0] ) ); if ( strpos( $tag[0], "/$tagname" ) ) { return "end_$tagname"; } if ( strpos( $tag[0], "/>" ) ) { return ""; } $ret = $this->getHTMLTree( $this->html_temp ); preg_match( '/<[a-zA-Z][*]?[^>]*>/', $this->html_temp, $tag ); if ( empty( $tag ) AND !strstr( $ret, 'end_' ) ) { $this->error[] = $tagname; } } } } ?>