/* Use, modification, and distribution are subject to the Boost Software 
License, Version 1.0. (See accompanying file LICENSE_1.0.txt or copy at 
www.boost.org/LICENSE_1.0.txt) */

<?php
//This php function implements a search heuristic that
//gives results a weight based on tags around search terms, 
//whether a page is in the carleton domain or not, and if it
//is within the carleton domain, how deep in the tree it is.

//It returns an array of uids in weight order to search.php

//author: Mike Ottum

$version = 3.41;
// v3 changes: now only returns pages containing all of the search terms

$hername = "tags";

function rankPages($qlist, $escaped_qlist, $database, $debug) {
  $title_bonus = 50;
  $header_bonus = 5;
  $bold_bonus = 3;
  $underline_bonus = 3;
  $italic_bonus = 3;
  $carleton_bonus = 200;
  $search_query = $qlist;
  $url_bonus = 10;

  mysql_select_db( $database ) or die( "Couldn't select database $database" );

  $num_terms = 0;
  foreach( $search_query as $S )
    {
      foreach( $S as $w )
	{
	  $num_terms++;
	}
    }
  for($i=0; $i<$num_terms; $i++) {
    $complete += pow(10, $i);
  }
  $term_count = 0;

  foreach( $search_query as $S )
    {
      foreach( $S as $w )
	{
	  $query = "SELECT w2u.urlid as urlid, u.url as url, w2u.tag as tag FROM word_to_url w2u, word w, url u WHERE w.wid = w2u.wid AND w.word = \"".$w."\" AND u.urlid=w2u.urlid;";
	  $result = mysql_query( $query ) or die( "Query failed: ".mysql_error()."<br>".$query);
	  while($line = mysql_fetch_array($result,MYSQL_ASSOC))
	    {
	      // check off this word for this document
	      if($in_doc[$line['urlid']] < pow(10, $term_count)) {
		$in_doc[$line['urlid']] += pow(10, $term_count);
	      }
	      
	      // check to see whether this document has all the search terms in it at this point
	      if($in_doc[$line['urlid']] == $complete) {
		$complete_doc = true;
	      }
	      else {
		$complete_doc = false;
	      }

	      if(substr_count($line['tag'], 'title') > 0) {
		// check off this word for this title
		if($in_title[$line['urlid']] < pow(10, $term_count)) {
		  $in_title[$line['urlid']] += pow(10, $term_count);
		}
		
		// check to see whether this title has all the search terms in it at this point
		if($in_title[$line['urlid']] == $complete) {
		  $complete_title = true;
		}
		else {
		  $complete_title = false;
		}
		// extra bonus if all search terms are in the title
		if($complete_title && $num_terms > 1) {
		  $temp_weight[$line['urlid']] += $title_bonus * 10;
		}
	      }
	      if(substr_count($line['tag'], 'h1') > 0) {
		$temp_weight[$line['urlid']] += $header_bonus;
	      }
	      if(substr_count($line['tag'], 'b') > 0) {
		$temp_weight[$line['urlid']] += $bold_bonus;
	      }
	      if(substr_count($line['tag'], 'u') > 0) {
		$temp_weight[$line['urlid']] += $underline_bonus;
	      }
	      if(substr_count($line['url'], $w) > 0) {
		$temp_weight[$line['urlid']] += $url_bonus;
	      }
	      if(!$bonus[$line['urlid']] && preg_match("/https?:\/\/[^\/]*carleton.*/i", $line['url']) > 0) {
		$bonus[$line['urlid']] = true;
		$temp_weight[$line['urlid']] += $carleton_bonus - 5 * (substr_count($line['url'], '/') - 2);
	      }
	      $temp_weight[$line['urlid']]++;

	      // if this doc has all the search terms in it, add it to the $weight hash
	      if($complete_doc) {
		$weight[$line['urlid']] = $temp_weight[$line['urlid']];
	      }
	    }
	  $term_count++;
	}
    }
  if($weight)
    {
      arsort($weight);
      $weight = normalizeWeights($weight);
      foreach($weight as $k => $v)
	{
	  $urllist[] = array($k, $v);
	}
    }
  return $urllist;
}
?>
