url ->name ->description ->topic [category] ->url ->name ->content [topic] -> user -> content .... .... ..... */ $url = "http://"; $login = ""; $password = ""; function file_get_content($url, $fields) { $fields_string = ""; foreach($fields as $key=>$value) { $fields_string .= $key.'='.$value.'&'; } rtrim($fields_string, '&'); $ch = curl_init(); curl_setopt($ch, CURLOPT_URL , $url); if ($fields){ curl_setopt($ch, CURLOPT_POST , count($fields)); curl_setopt($ch, CURLOPT_POSTFIELDS , $fields_string); } curl_setopt($ch, CURLOPT_SSL_VERIFYPEER , FALSE); curl_setopt($ch, CURLOPT_COOKIEFILE , "cookie.txt"); curl_setopt($ch, CURLOPT_COOKIEJAR , "cookie.txt"); curl_setopt($ch, CURLOPT_RETURNTRANSFER , 1); curl_setopt($ch, CURLOPT_FOLLOWLOCATION , true); $result = curl_exec($ch); $datas = curl_getinfo($ch); curl_close($ch); return $result; } function wd_remove_accents($str, $charset='utf-8') { $str = htmlentities($str, ENT_NOQUOTES, $charset); $str = preg_replace('#&([A-za-z])(?:acute|cedil|caron|circ|grave|orn|ring|slash|th|tilde|uml);#', '\1', $str); $str = preg_replace('#&([A-za-z]{2})(?:lig);#', '\1', $str); $str = preg_replace('#&[^;]+;#', '', $str); $str = preg_replace('/#(.*?);/i', '', $str); return $str; } function strip_tags2($content){ $content = str_replace(" ", "", strip_tags($content)); return trim(wd_remove_accents($content)); } //Connexion au Forum : function ConnexionForum($url, $login, $password){ $connexion = file_get_content($url.'/login', array("username"=>$login, "password"=>$password, "autologin"=>"on", "redirect"=>"", "query"=>"", "login"=>"Connexion")); } //Export User function ExportUser($url){ $start = 0; $members=array(); while($start <= 3000){ $contents = file_get_content($url."/memberlist?mode=lastvisit&order=DESC&username&start=".$start, array()); $test = preg_match_all('/(.*?)<\/a>/s', $contents, $userlist); unset($userlist[0]); foreach($userlist[1] as $k => $v){ $userlist[2][$k] = strip_tags2($userlist[2][$k]); $usercontent = file_get_content($url."/".$v, array()); $ok = preg_match_all('/(.*?)<\/a>(.*?)<\/strong>(.*?)'. '
(.*?)<\/table><\/td><\/tr>/s'; $contentTopic = array(); if (preg_match_all($template, $contents2, $topic3)){ $count2 = 0; while($count2 <= 4) unset($topic3[$count2++]); $count2 = 0; while (isset($topic3[5][$count2])){ $contentTopic[$count2]['user'] = strip_tags($topic3[5][$count2]); $contentTopic[$count2]['content'] = strip_tags($topic3[7][$count2]); $contentTopic[$count2]['parentForumId'] = $idForum; $contentTopic[$count2]['TopicId'] = $idTopic; $contentTopic[$count2]['MessageId'] = $count2; echo "Extraction Message de ".$contentTopic[$count2]['user']." \n"; $count2++; } } return $contentTopic; } //Extract List of Topic function ExtractListeTopic($url, $contents,$idForum){ $template = '/(.*?)(.*?)<\/a>(.*?)<\/td>/s'; $listtopic = array(); if (preg_match_all($template, $contents, $topic2)){ $temp = 0; while ($temp <= 2) unset($topic2[$temp++]); if (isset($topic2[5])) unset($topic2[5]); $count = 0; while (isset($topic2[3][$count])){ $listtopic[$count]['name'] = $topic2[4][$count]; $listtopic[$count]['parentForumId'] = $idForum; $listtopic[$count]['TopicId'] = $count; $listtopic[$count]['url'] = (isset($topic2[3][$count]))?$topic2[3][$count]:""; echo "========> Extraction Topic ".$listtopic[$count]['name']."\n"; $contents2 = file_get_content($url.$listtopic[$count]['url'], array()); $listtopic[$count]['content'] = GetContentTopic($contents2,$idForum,$count); ++$count; sleep(1); } } return $listtopic; } //Extract Forum function ExtractForum($url, $contents){ $template = '/(.*?)(.*?)<\/a>(.*?)<\/td>/s'; $test = preg_match_all($template, $contents, $topic); $i = 0; while ($i <= 2) unset($topic[$i++]); if (isset($topic[5])) foreach ($topic[5] as $k => $v) $topic[5][$k] = strip_tags($v); $i = 0; $export = array(); while(isset($topic[3][$i])){ $export[$i]['url'] = $topic[3][$i]; $export[$i]['name'] =(isset($topic[4][$i]))?$topic[4][$i]:""; $export[$i]['description'] =(isset($topic[5][$i]))?$topic[5][$i]:""; $export[$i]['forumId'] = $i; echo "Extraction Forum ".$export[$i]['name']."\n"; $contents = file_get_content($url.trim($export[$i]['url']),array()); $export[$i]['sousForum'] = ExtractForum($export[$i]['url'], $contents); $export[$i]["topic"] = ExtractListeTopic($url, $contents, $i); } return $export; } //Main Code ConnexionForum($url, $login, $password); $contents = file_get_content($url, array()); ExtractForum($url, $contents); ExportUser($url);