DOMDocument XPath

mafiabo

New Member
Can somebody show me some examples to import a html-page anduse the XPath to find the keywords including the rest of the text from the div, p, title etc.Thank you!EDIT:In this case i use my webcrawler for example, i have a form to get the website to be crawled and the keywords wich has to be find in pages of the website.\[code\]http://crawler.tmp.remote.nl/example.php\[/code\]Now it scans for webpages with the keywords inside, my problem.. I need the div area around the founded keywords.\[code\]class MyCrawler extends PHPCrawler { function handlePageData(&$page_data) { // CHECK DOMEIN $domain = $_POST['domain']; $keywords = $_POST['keywords']; //$tags = get_meta_tags($page_data["url"]); //$iKeyFound = null;$find = $keywords;$str = file_get_contents($page_data["url"]);if(strpos($str, $find) == true){ echo $page_data["referer_url"]. ' - gevonden'; $keywords = $_POST['keywords']; if($page_data["header"]){ echo "<table border='1' >"; echo "<tr><td width='300'>Status:</td><td width='500'> ".strtok($page_data["header"], "\n")."</td></tr>";} else "<table border='1' >"; // PRINT EERSTE LIJN echo "<tr><td>Page requested:</td><td> ".$page_data["url"]."</td></tr>"; // PRINT STATUS WEBSITE // PRINT WEBPAGINA echo "<tr><td>Referer-page:</td><td> ".$page_data["referer_url"]."</td></tr>"; // CONTENT ONTVANGEN? if ($page_data["received"]==true) echo "<tr><td>Content received: </td><td>".$page_data["bytes_received"] / 8 . " Kbytes</td></tr></table>"; else echo "<tr><td>Content:</td><td> Not received</td></tr></table>"; $domain = $_POST['domain']; $link = mysql_connect('localhost', 'crawler', '--'); if (!$link) { die('Could not connect: ' . mysql_error()); } mysql_select_db("crawler"); if(empty($page_data["referer_url"])) $page_data["referer_url"] = $page_data["url"]; strip_tags($str, '<p><b>'); $matches = $keywords; //$match = preg_match_all("'/<(*.?)(*.?)>(*.?)'".$keywords."'(*.?)<\/($1)>/'", $str, $matches, PREG_SET_ORDER); //echo $match; mysql_query("INSERT INTO crawler (id, domain, url, keywords, data) VALUES ('', '".$page_data["referer_url"]."', '".$page_data["url"]."', '".$keywords."', '".mysql_real_escape_string($str) . "' )"); echo '<br>'; echo "<br><br>"; echo str_pad(" ", 5000); // "Force flush", workaround flush();}\[/code\]
 
Back
Top