﻿<?php
/*$desc = "Taille de l'Ecran (en g/m²) : 117";
 $retour = preg_replace("/[ (]en ([^):space:]+)[)]?[ :]{1,3}([0-9.]+)/i"," $2 $1",$desc);
 print_rr($retour);
 exit;*/

//include_once RECH_INCLUDE_PATH.'utils/phonex.class.php';
include_once RECH_INCLUDE_PATH.'utils/soundex2.php';
//include_once RECH_INCLUDE_PATH.'utils/double_metaphone.class.php';
include_once 'utils/PaiceHuskStemmer/PaiceHuskStemRules_fr.php';
include_once 'utils/PaiceHuskStemmer/PaiceHuskStemmer.php';

include_once RECH_INCLUDE_PATH.'utils/utils.inc.php';

function calcule_poids_mot($points,$coef,$idx,$nb_mots) {
	//	echo "$coef ;";
	return round(($points / exp($idx * $coef)) + ($points / exp(1/$nb_mots)),2);
	//return max(0,$poids-($idx*$coef));
}

function calcule_poids_mot_recherche($position) {
	return round(exp($position * COEF_RECHERCHE),2);
	//return max(0,$poids-($idx*$coef));
}
//$n=Rech_Parseur::str_to_singulier("les petits as du trou du maurrois sont centrois",$toto);
//echo $n;
//print_rr($toto);
//echo Rech_Parseur::recherche_to_lisible($n,$toto);
//exit;

class Rech_Parseur {

	public static $affiche_phrases = false;
	public static $use_stats = false;

	private static $initialised = false;
	private static $exceptions_inclues;
	private static $exceptions_inclues_tmp;
	private static $exceptions_exclues;
	private static $expressions;
	private static $soundex;
	private static $phonex;

	// optimisations
	private static $index_racines = array();
	/**
	 * mots remplacés : tv,télé,télévision => televiseur
	 * @var unknown_type
	 */
	private static $remplacements;

	public static function init_parseur() {
		self::$affiche_phrases=(isset($_REQUEST["affreq"]) && (EST_UBALDI===true));
		if(self::$initialised) return;

		self::$initialised = true;

		self::$index_racines = array();
		self::init_phonex();
		self::init_soundex2();

		// ici, initialisation de toutes les optimisations possibles
		self::$exceptions_inclues=mysql_fetch_column(ifReq("SELECT mot FROM ".RECH_BASE.RECH_TABLE_EXCEPTION." WHERE inclusion=1"),"mot");
		self::$exceptions_inclues_tmp=array();
		self::$exceptions_exclues=mysql_fetch_column(ifReq("SELECT mot FROM ".RECH_BASE.RECH_TABLE_EXCEPTION." WHERE inclusion=0"),"mot");
		self::$remplacements=mysql_fetch_all(ifReq("SELECT mc1.mot AS cherche, mc2.mot AS remplace,mc1.mot_supplementaire
				FROM ".RECH_BASE.RECH_TABLE_MOT_CLE." mc1 ,".RECH_BASE.RECH_TABLE_MOT_CLE." mc2 
				WHERE mc1.id_mot_parent IS NOT NULL AND mc1.id_mot_parent = mc2.id_mot_cle"));
		self::$expressions=mysql_fetch_all(ifReq("SELECT cherche, remplace_index, remplace_recherche, remplace_lisible FROM ".RECH_BASE.RECH_TABLE_EXPRESSION));
	}

	public static function ajoute_exception_inclue_tmp($mot) {
		self::init_parseur();
		$mot=self::normalise_chaine($mot);
		if(!in_array($mot,self::$exceptions_inclues_tmp))self::$exceptions_inclues_tmp[]=$mot;
	}

	public static function reset_exception_inclue_tmp() {
		self::$exceptions_inclues_tmp=array();
	}

	public static function ajoute_exception_inclue($mot) {
		self::init_parseur();
		self::$exceptions_inclues[]=self::normalise_chaine($mot);
	}

	public static function init_phonex() {
		self::$phonex=new phonex();
	}
	public static function init_soundex2() {
		self::$soundex=new soundex2();
	}

	/**
	 * renvoie un tableau avec {mot,metaphone,poids,nb,position}
	 * @param $str_contenu
	 * @return array();
	 */
	public static function parse_contenu_page($str,$points,$coeff,$no_decoupes = "",$fn_poids=null) {
		self::reset_exception_inclue_tmp();
		$str=trim($str);
		if(strlen($str)==0) return false;
		if(self::$use_stats) CStat::SDebutStat("self::init_parseur");
		self::init_parseur();
		if(self::$use_stats) CStat::SDebutStat("self::init_parseur");
		//	if(self::$use_stats) echo "<br>'".htmlentities($str)."'<br>";

		if(RECHERCHE_DECOUPE_TOUTES_REFS === true && $no_decoupes != "no_decoupes")
		$str = self::remplace_references_texte_et_num($str,RECH_LNG_MIN_DECOUPE_REF_INDEX,$reverse);


		if(self::$use_stats) CStat::SDebutStat("self::normalise_expressions");
		$str=self::normalise_expressions($str);
		if(self::$affiche_phrases) echo "<br>1 '".htmlentities($str)."'<br>";

		if(self::$use_stats) CStat::SDebutStat("self::normalise_chaine");
		$str=self::normalise_chaine($str);
		if(self::$affiche_phrases) echo "<br>2 '".htmlentities($str)."'<br>";

		$str=self::str_to_singulier($str);
		if(self::$affiche_phrases) echo "<br>2 '".htmlentities($str)."'<br>";

		if(self::$use_stats) CStat::SDebutStat("self::normalise_mots");
		$str=self::normalise_mots($str);
		if(self::$affiche_phrases) echo "<br>3 '".htmlentities($str)."'<br>";

		if(self::$use_stats) CStat::SDebutStat("self::normalise_expressions");
		$str=self::normalise_expressions($str);
		if(self::$affiche_phrases) echo "<br>4 '".htmlentities($str)."'<br>";

		$str=self::normalise_mots($str);

		if(self::$affiche_phrases) echo "<br>5 '".htmlentities($str)."'<br>";

		if(self::$use_stats) CStat::SDebutStat("self::decoupe_tableau");


		$str=trim($str);
		if(strlen($str)==0) return false;


		$retour= self::decoupe_tableau($str,$points,$coeff,true,$fn_poids);
		if(self::$use_stats) CStat::SFinStat();

		if(self::$affiche_phrases){
			include_once RECH_INCLUDE_PATH.'utils/utils.inc.php';
			print_r_tableau_html_trie($retour["mots"],"poids");
		}
		return $retour;
	}

	public static function remplace_references_texte_et_num($str,$lng_min=1,&$tab_reverse) {
		include_once RECH_INCLUDE_PATH.'utils/str_utils.inc.php';
		$split=explode(" ",$str);
		foreach($split as &$un_mot) {
			if(is_alpha_and_numeric_multiple($un_mot,RECH_MIN_CHANGEMENTS_REF)) {
				$reverse=array("avant"=>$un_mot);
				$decoupe=decoupe_chaine_ref($un_mot,$lng_min);
				//	foreach($decoupe as $excp) self::ajoute_exception_inclue($excp);
				$un_mot = implode(" ",$decoupe);

				$reverse["apres"]=$un_mot;
				$tab_reverse[]=$reverse;
			}
		}
		return implode(" ",$split);

	}
	/**
	 *
	 * @param $str_recherche
	 * @return array()
	 */
	//public static $last_recherche;
	public static $last_recherche_finale;
	public static $last_recherche_effective;
	public static $last_recherche_lisible;
	public static function parse_recherche($str,$poids=10,$poids_decrement=1) {
		self::reset_exception_inclue_tmp();

		//		echo "RECHERCHE_DECOUPE_TOUTES_REFS = ".RECHERCHE_DECOUPE_TOUTES_REFS;

		if(isset($_REQUEST["affreq"])) {
			echo "<br>parse_recherche $str";
		}
		$tab_reverse=array();
		if(RECHERCHE_DECOUPE_TOUTES_REFS === true )
		$str = self::remplace_references_texte_et_num($str,RECH_LNG_MIN_DECOUPE_REF_INDEX,$tab_reverse);
		//decouper les refs
		$str=trim($str);
		if(strlen($str)==0) return false;
		self::init_parseur();
		$mots_justes=false;
		$str=self::limite_mots_recherche($str);
		$str=self::normalise_refs_recherche($str,$tab_reverse);
		$str=self::normalise_expressions($str,"recherche",$tab_reverse);
		$str=self::str_to_singulier($str,$tab_reverse);
		$str=self::normalise_chaine($str);

		//	while(!$mots_justes) {
		if(self::$affiche_phrases) {
			echo "<br>------------";
			echo "<br>-1----------- $str";
			echo "<br>------------";
		}
		$str=self::normalise_exceptions($str);
		if(self::$affiche_phrases) {
			echo "<br>------------";
			echo "<br>-2a----------- $str";
			echo "<br>------------";
		}
		$str=self::normalise_mots_existants($str);
		if(self::$affiche_phrases) {
			echo "<br>------------";
			echo "<br>-2v----------- $str";
			echo "<br>------------";
		}
		$str=self::normalise_mots($str,MOT_SUPPL_RECHERCHE);
		self::$last_recherche_effective=$str;
		$str=self::normalise_expressions($str,"recherche",$tab_reverse);
		if(self::$affiche_phrases) {
			echo "<br>------------";
			echo "<br>-3----------- $str";
			echo "<br>------------";
		}
		$str=self::normalise_mots($str,MOT_SUPPL_RECHERCHE);
		if(self::$affiche_phrases) {
			echo "<br>------------";
			echo "<br>-4----------- $str";
			echo "<br>------------";
		}
		$str=trim($str);
		if(strlen($str)==0) return false;

		$str = str_replace("$","",$str);
		self::$last_recherche_lisible = self::recherche_to_lisible($str,$tab_reverse);
//		print_rr($tab_reverse);
//		echo "lizzsible : ".self::$last_recherche_lisible;
		//		self::$last_recherche_finale = self::recherche_to_lisible($str,$tab_reverse);
		$retour =  self::decoupe_tableau($str,$poids,$poids_decrement);
		if(self::$affiche_phrases) {
			print_rr($retour);
		}

		foreach($retour["mots"] as &$mot_recherche) {
			$mot_recherche["poids"] = calcule_poids_mot_recherche($mot_recherche["position"]+1);
		}

		$mots_justes = self::trouve_mots($retour);
		self::$last_recherche_finale = implode(" ",$retour["index"]);
		//		print_rr($retour);
		/*if(!$mots_justes) {
		 $mots=array();
		 foreach($retour["mots"] as $un_mot) {
		 $mots[]=$un_mot["mot"];
		 }
		 //return self::reparse_recherche($retour);
		 $str=implode(" ",$mots);
		 }*/
		//}
		return $retour;
	}

	public static function recherche_to_lisible($str,$tab_reverse) {
		$str="@".str_replace(" ","@",$str)."@";
//		echo "$str";
		for($i=count($tab_reverse)-1;$i>=0;$i--) {
			if($tab_reverse[$i]["apres"] == $tab_reverse[$i]["avant"]) continue;
			$tab_reverse[$i]["apres"] = str_replace(" ","@",$tab_reverse[$i]["apres"]);
//			echo "<br>cherche ".$tab_reverse[$i]["apres"];
//			echo "<br>remplace ".$tab_reverse[$i]["avant"];
//			echo "<br>$str";
			$str=str_ireplace("@".$tab_reverse[$i]["apres"]."@","@".$tab_reverse[$i]["avant"]."@",$str);
//			echo "<br>$str<br>";
		}
		return trim(str_replace("@"," ",$str));
		return $str;
	}

	/*public function reparse_recherche($res_mots) {
		$mots = array();
		foreach($res_mots as $un_mot) {
		$mots[]=$un_mot["mot"];
		}
		//return self::parse_recherche(implode(" ",$res_mots));
		}*/


	/**
	 * a faire avant toute opération sur les mots :
	 * retire les accents, les caractères spéciaux, etc...
	 * @param $str
	 * @return unknown_type
	 */
	public static function normalise_chaine($str) {
		// regroupe les séries de chiffres
		//$str=preg_replace("/([^a-z])([0-9]+) ([0-9]+)([^a-z])/","$1$2$3$4",$str);
		return self::normalise_ponctuation(self::strip_accents(strtolower($str)));
	}
	public function limite_mots_recherche($str) {
		$split=explode(" ",$str);
		$retour="";
		$mots_restant=RECH_MAX_NB_MOTS;
		foreach($split as $mot) {
			if(strlen($mot)>1) {
				$mots_restant--;
			}
			$retour.=" ".$mot;
			if($mots_restant<=0) {
				break;
			}
		}
		return trim($retour);
		//		return implode(" ",array_slice($split,0,RECH_MAX_NB_MOTS));
	}
	public function normalise_refs_recherche($str,&$tab_reverse) {
		include_once RECH_INCLUDE_PATH.'utils/str_utils.inc.php';
		$split=explode(" ",$str);
		foreach($split as &$un_mot) {
			if(is_alpha_and_numeric_multiple($un_mot,1)) {
				$reverse=array("avant"=>$un_mot);
				//				$un_mot= implode(" ",decoupe_chaine_ref($un_mot));
				$tab = decoupe_ref($un_mot,RECH_LNG_MIN_DECOUPE_REF_RECHERCHE,false);
				$reverse["apres"]=implode(" ",$tab);
				$tab_reverse[]=$reverse;
				foreach($tab as $mot) {
					//					if(!(Rech_Parseur::mot_est_valide($mot)))
					Rech_Parseur::ajoute_exception_inclue_tmp($mot);
				}
				$un_mot = implode(" ",$tab);
				//				$un_mot = '$'.implode("$ $",$tab).'$';
			}
		}
		return implode(" ",$split);
	}
	public static function normalise_exceptions($str) {
		$split=explode(" ",$str);
		$retour=array();
		foreach($split as $mot)
		if(self::mot_est_valide($mot))
		$retour[]=$mot;
		return implode(" ",$retour);
	}

	private static function get_racine($str,$dictionnaire="fr") {
		if(strlen($str)<MIN_RACINISATION) return $str;
		if(self::est_exception_inclue($str)) return $str;

		//		if(array_key_exists($dictionnaire,self::$index_racines))
		if(is_array(self::$index_racines[$dictionnaire]) && array_key_exists($str,self::$index_racines[$dictionnaire])) return self::$index_racines[$dictionnaire][$str];

		$trouve = PaiceHuskStemmer($str,$dictionnaire);

		self::$index_racines[$dictionnaire][$str] = $trouve;
		return $trouve;
	}

	private static function get_phonex($str) {
		if(!self::$phonex) self::init_phonex();
		self::$phonex->build($str);
		return self::$phonex->sString;
	}
	private static function get_metaphone($str) {
		if(TYPE_METAPHONE=="SOUNDEX") {
			return soundex($str);
		} else if(TYPE_METAPHONE=="SOUNDEX2") {
			if(!self::$soundex)self::init_soundex2();
			self::$soundex->build($str);
			return self::$soundex->sString;
		} else if(TYPE_METAPHONE=="DOUBLE_PRIMARY") {
			$double = new DoubleMetaPhone($str);
			return $double->primary;
		} else if(TYPE_METAPHONE=="DOUBLE_SECONDARY") {
			$double = new DoubleMetaPhone($str);
			return $double->secondary;
		}
		return soundex($str);
	}

	public static function str_to_singulier($str,&$tab_reverse="vide") {
		if($tab_reverse=="vide") $tab_reverse=array();
		$split=explode(" ",$str);
		foreach($split as &$mot) {
			if(self::est_exception_inclue($mot)) continue;
			if(strlen($mot)>=MIN_LNG_SINGULIER && substr($mot,-1)=='s' && !preg_match("/([0-9]+)/",$mot)) {
				$apres=substr($mot,0,-1);
				$tab_reverse[]=array(
						"avant"=>$mot,
						"apres"=>$apres);
				$mot=$apres;
			}
		}
		return implode(" ",$split);
	}
	public static function mot_to_complexe($mot,$poids = NULL,$nb = NULL,$position = NULL) {
		if(strlen($mot)==0) return;

		//echo "mot_to_complexe($mot";
		if(self::$use_stats) CStat::SDebutStat("self::get_racine");
		$racine=self::get_racine($mot,"fr");
		//$singulier=self::get_racine($mot,"singulier");
		if(self::$use_stats) CStat::SDebutStat("self::get_metaphone");
		$metaphone = self::get_metaphone($mot);
		if(self::$use_stats) CStat::SDebutStat("self::get_phonex");
		$phonex = self::get_phonex($mot);
		//echo "= $phonex";
		if(self::$use_stats) CStat::SFinStat();
		$nouveau_mot = array(
				"mot"=>$mot,
				"racine"=>$racine,
		//"singulier"=>$singulier,
				"metaphone"=>$metaphone,
				"phonex"=>$phonex
		);
		if($poids !== NULL) $nouveau_mot["poids"]=$poids;
		if($nb !== NULL) $nouveau_mot["nb"]=$nb;
		if($position !== NULL) $nouveau_mot["position"]=$position;
		return $nouveau_mot;
	}


	public static function decoupe_tableau($str,$points,$coef,$make_unique = true,$fn_poids=null) {
		//		echo "<br> !! $str<br>";
		if(strlen(trim($str))==0) return array();
		$split=explode(" ",$str);
		$mots=array();
		$index=array();
		$cle_unique_mot= Rech_Indexeur::$cle_unique_mot;
		$nb_mots = count($split);
		foreach($split as $idx=>$mot) {


			$nouveau_mot = self::mot_to_complexe($mot,0,1,$idx);


			$mot_cherche = $nouveau_mot[$cle_unique_mot];

			if(!self::mot_est_valide($mot)) continue;
			$pos=array_pos($index,$mot_cherche);
			if($pos>-1) {
				$mots[$pos]["nb"]++;
				if($nouveau_mot["mot"] != $mots[$pos]["mot"])  $mots[$pos]["autres_mots"].="|".$nouveau_mot["mot"];
			} else {
				array_push($index,$mot_cherche);
				array_push($mots,$nouveau_mot);
			}
		}
		//		// !!!!!!!!!!!!!!!!!!!!!!!!!!
		//		Rech_Spider::debloque_spider();
		//		// !!!!!!!!!!!!!!!!!!!!!!!!!!
		foreach($mots as &$un_mot) {
			if($make_unique) $un_mot["nb"]=1;
			/*if($fn_poids != null) {
				if(is_array($fn_poids)) {
				$classe=array_shift($fn_poids);
				$fn=array_shift($fn_poids);
				array_unshift($fn_poids,$un_mot["nb"]);
				array_unshift($fn_poids,$un_mot["position"]);
				$un_mot["poids"] = call_user_func_array(array($classe,$fn),$fn_poids);

				} else {
				$un_mot["poids"] = $fn_poids($un_mot["position"],$un_mot["nb"]);
				}
				} else {*/
			$un_mot["poids"] = calcule_poids_mot($points,$coef,$un_mot["position"],$un_mot["nb"]);
			//}
		}
		return array("index"=>$index,"mots"=>$mots);
	}

	/**
	 * essaye de mettre un id_mot_cle sur chaque mot.
	 * @param $mots : un tableau index / mots complexes
	 * @return unknown_type
	 */
	private static function trouve_mots(&$source) {
		$mots_justes = true;
		foreach($source["mots"] as &$mot_cmplx) {
			$mots_justes &= self::trouve_mot($mot_cmplx);
			//	echo "mots_justes $mots_justes<br>";
		}
		return $mots_justes;
	}
	public static function trouve_mot(&$mot_cmplx,$mot_reel=false) {
		$mot_exact = true;
		if(RECHERCHE_USE_RACINE_DIRECT) {
			$r1.=",(mc.racine='$mot_cmplx[racine]') AS meme_racine";
			$r2.="OR mc.racine='$mot_cmplx[racine]'";
			$r3.=", meme_racine DESC";
		}
		if(RECHERCHE_USE_METAPHONE_DIRECT) {
			$r1.=",(mc.metaphone='$mot_cmplx[metaphone]') AS meme_metaphone";
			$r2.="OR (mc.metaphone='$mot_cmplx[metaphone]' AND metaphone!='".METAPHONE_NUL."')";
			$r3.=", meme_metaphone DESC";
		}
		if(RECHERCHE_USE_PHONEX_DIRECT) {
			$r1.=",(mc.phonex='$mot_cmplx[phonex]') AS meme_phonex";
			$r2.="OR (mc.phonex='$mot_cmplx[phonex]' AND phonex>0)";
			$r3.=", meme_phonex DESC";
		}
		if(defined(RECH_POIDS_MINI)) $AND_POIDS = "AND mc.poids_total>=".RECH_POIDS_MINI;
		if($mot_reel) {
			$req="SELECT
			mc.id_mot_cle,
			mc.mot,
			mc.racine,
			mc.metaphone,
			mc.phonex,
			(mc.mot='$mot_cmplx[mot]') AS meme_mot
			$r1
			FROM ".RECH_BASE.RECH_TABLE_MOT_CLE." mc 
			WHERE mc.mot='$mot_cmplx[mot]' $AND_POIDS $r2 
			ORDER BY meme_mot DESC $r3";
		} else {
			$req="SELECT
			ifnull(mc.id_mot_parent,mc.id_mot_cle) as id_mot_cle,
			ifnull(mc2.mot,mc.mot) as mot,
			ifnull(mc2.racine,mc.racine) as racine,
			ifnull(mc2.metaphone,mc.metaphone) as metaphone,
			ifnull(mc2.phonex,mc.phonex) as phonex,
			(mc.mot='$mot_cmplx[mot]') AS meme_mot
			$r1
			FROM ".RECH_BASE.RECH_TABLE_MOT_CLE." mc 
			LEFT JOIN ".RECH_BASE.RECH_TABLE_MOT_CLE." mc2 ON mc2.id_mot_cle = mc.id_mot_parent AND mc.mot_supplementaire=0
			WHERE mc.mot='$mot_cmplx[mot]' $AND_POIDS $r2 
			ORDER BY meme_mot DESC $r3
			";
		}
		if(self::$use_stats) echo "<br>$req<br>";
		if($res=testReq3($req)) {
			$mot_cmplx["id_mot_cle"] = $res["id_mot_cle"];
			$mot_exact = ($res["meme_mot"]==1);
		} else {
			$res=self::trouve_mot_levenshtein($mot_cmplx);
			$mot_cmplx["ratio"] = $res["ratio"];
			$mot_exact = false;
		}
		if($res) {
			if(self::$use_stats) print_rr($res);
			$mot_cmplx["mot"] = $res["mot"];
			$mot_cmplx["racine"] = $res["racine"];
			$mot_cmplx["metaphone"] = $res["metaphone"];
			$mot_cmplx["phonex"] = $res["phonex"];
			$mot_cmplx["id_mot_cle"] = $res["id_mot_cle"];
			//$mot_cmplx["remplace"] = true;
			return $mot_exact;
		}
		return "non trouve";

	}

	private static function trouve_mot_levenshtein($mot_cmplx) {
		include_once RECH_INCLUDE_PATH.'utils/str_utils.inc.php';
		$duree_validite = est_mot_que_lettres($mot_cmplx["mot"])?RECH_LEVEN_DUREE_VALIDITE:RECH_LEVEN_DUREE_VALIDITE_TEMPORAIRE;
		// TODO:config
		if(MEMORISE_LEVENSHTEIN === true){
			$rq="SELECT mc.* FROM ".RECH_BASE.RECH_TABLE_LEVENSHTEIN." lev LEFT JOIN ".RECH_BASE.RECH_TABLE_MOT_CLE." mc ON lev.id_mot_cle = mc.id_mot_cle WHERE lev.mot='$mot_cmplx[mot]'";
			if(self::$use_stats) echo "<br>$rq<br>";
			if($res=testReq3($rq)) {
				ifReq("UPDATE ".RECH_BASE.RECH_TABLE_LEVENSHTEIN." SET nb=nb+1, date_validite=NOW()+ INTERVAL $duree_validite WHERE mot='$mot_cmplx[mot]'");
				return $res;
			}
		}

		$champ_levenshtein = CHAMP_LEVENSHTEIN;

		$lng=strlen($mot_cmplx[$champ_levenshtein]);

		$lng_min=floor($lng*(1-POURCENT_LONGUEUR_LEVENSHTEIN));
		$lng_max=ceil($lng*(1+POURCENT_LONGUEUR_LEVENSHTEIN));

		/*$req="SELECT id_mot_cle,mot,racine,metaphone,
		 LEVENSHTEIN_RATIO($champ_levenshtein,'$mot_cmplx[$champ_levenshtein]') AS ratio
		 FROM ".RECH_BASE.RECH_TABLE_MOT_CLE."
		 WHERE CHAR_LENGTH(mot)>=$lng_min
		 AND CHAR_LENGTH(mot)<=$lng_max
		 HAVING ratio>".LEVENSHTEIN_RATIO_MINI." ORDER BY ratio DESC";
		 */
		$nb=0;
		$timer = microtime(true);
		$mot_cherche = $mot_cmplx[$champ_levenshtein];

		// left join permet d'intégrer le poids du parent
		// ex : tv => televiseur est son parent
		if(defined(RECH_POIDS_MINI)) $AND_POIDS = "AND mc.poids_total>=".RECH_POIDS_MINI;
		$requete = "SELECT mc.$champ_levenshtein, mc.poids_total+IFNULL(mc2.poids_total,0) AS poids_total , mc.id_mot_cle
				FROM ".RECH_BASE.RECH_TABLE_MOT_CLE." mc LEFT JOIN ".RECH_BASE.RECH_TABLE_MOT_CLE." mc2 ON mc.id_mot_parent=mc2.id_mot_cle
				WHERE mc.id_mot_parent IS NULL AND mc.is_numeric=0 $AND_POIDS AND  CHAR_LENGTH(mc.mot)>=$lng_min AND CHAR_LENGTH(mc.mot)<=$lng_max";
		if(self::$use_stats) echo $requete;
		if($req=ifReq($requete)) {
			$retour = array();
			while($res = mysql_fetch_assoc($req)) {
				$ratio_final = self::calcule_ratio_levenshtein($mot_cmplx[$champ_levenshtein],$res[$champ_levenshtein],$res["poids_total"]);
				if($ratio_final>-1) {
					$retour[]=array("total"=>$ratio_final,"id_mot_cle"=>$res["id_mot_cle"]);
				}
				$nb++;
			}
			include_once RECH_INCLUDE_PATH.'utils/utils.inc.php';
			array_sortOn($retour,"total");
			if(self::$affiche_phrases) print_rr($retour);
		} else {
			echo mysql_error();
		}
		$timer2 = microtime(true);
		if(self::$use_stats) echo "$nb levenshtein calculés en ".($timer2-$timer);



		if(count($retour)==0) {

			// pas de correspondance cohérente trouvée
			if(MEMORISE_LEVENSHTEIN === true){
				$rq_ins = "INSERT INTO ".RECH_BASE.RECH_TABLE_LEVENSHTEIN." SET mot='$mot_cmplx[mot]' , id_mot_cle = -1 , ratio = -1, date_validite=now() + INTERVAL $duree_validite,nb=1";
				if(self::$use_stats) echo "<br>$rq_ins<br>";
				ifReq($rq_ins);
			}
			return false;
		} else {
			$le_ok = $retour[0];
			$req="SELECT *,$le_ok[total] as ratio_final FROM ".RECH_BASE.RECH_TABLE_MOT_CLE." WHERE id_mot_cle = $le_ok[id_mot_cle]";
			if(self::$use_stats) echo $req;


			if(self::$use_stats) echo "<br>$req<br>";
			$res=testReq3($req);
			if(MEMORISE_LEVENSHTEIN === true){
				if($res) {
					$rq_ins = "INSERT INTO ".RECH_BASE.RECH_TABLE_LEVENSHTEIN." SET mot='$mot_cmplx[mot]' , id_mot_cle = $res[id_mot_cle] , ratio = $res[ratio_final], date_validite=now() + INTERVAL $duree_validite,nb=1";
					if(self::$use_stats) echo "<br>$rq_ins<br>";
					ifReq($rq_ins);
				}
			}
			return $res;
		}
	}
	public static function calcule_ratio_levenshtein($mot,$mot_cherche,$poids_total) {
		//$mot = $mot_complexe[CHAMP_LEVENSHTEIN];
		$lev = levenshtein($mot,$mot_cherche);

		$ratio = round((1 - $lev / max(strlen($mot),strlen($mot_cherche))) * 100);

		similar_text($mot,$mot_cherche,$pc);
		$pc = round($pc);
		$ratio2 = round(($ratio+$pc)/2);
		if($ratio2 > LEVENSHTEIN_RATIO_MINI) {
			//echo "<br>$ratio *$pc* $poids_total = ".$ratio *$pc* $poids_total;
			return round($ratio *$pc* $poids_total/100);
		}
		return -1;

	}
	public static function combine_tableau(&$source,$rajout) {
		if(!is_array($source)) {
			$source = $rajout;
			return;
		}
		$index = &$source["index"];
		foreach($rajout["index"] as $idx=>$mot) {
			$pos=array_pos($index,$mot);
			$nouveau = &$rajout["mots"][$idx];
			if($pos>-1) {
				$trouve = &$source["mots"][$pos];
				$trouve["nb"]+=$nouveau["nb"];
				$trouve["poids"]+=$nouveau["poids"];
				if($trouve["mot"] != $nouveau["mot"]) $trouve["autres_mots"].="|".$nouveau["mot"];


			} else {
				array_push($index,$mot);
				array_push($source["mots"],$nouveau);
			}
		}
	}


	/*
	 *
	 * B.A.O.
	 *
	 */
	public static function strip_accents($string) {
		return strtr($string,'àáâãäçèéêëìíîïñòóôõöùúûüýÿÀÁÂÃÄÇÈÉÊËÌÍÎÏÑÒÓÔÕÖÙÚÛÜÝ','aaaaaceeeeiiiinooooouuuuyyAAAAACEEEEIIIINOOOOOUUUUY');
	}

	public static function normalise_ponctuation($string) {
		// TODO!
		$retour= strtr($string,"-•;,.:!?’´'}®{[]||()+/\r\n\"&".chr(160),'                              ');
		// retire doules espaces
		$retour = preg_replace("/[ ]{2,}/", " ", $retour);

		return trim($retour);
	}

	public static function est_mot_compose($str) {
		return strpbrk($str,"-;,.:!?'’´}{[]||()+/\r\n\"& ")!==false;
	}

	public static function normalise_expressions($string,$type="index",&$tab_reverse="vide") {
		if($tab_reverse=="vide") $tab_reverse=array();
		$string=str_replace(" ","  "," ".$string." ");
		$string=self::protege_exceptions($string);
		foreach(self::$expressions as $expression) {
			$remplace = $expression["remplace_$type"];
			$remplace_lisible = $expression["remplace_lisible"];
			$cherche=trim($expression["cherche"]);
			if(empty($cherche)) continue;
			$cherche='§ '.str_replace(" ","  ",$cherche).' §';
			$cherche=str_replace("[  ]?","[ ]*",$cherche);
			if(empty($remplace_lisible)) $remplace_lisible=$expression["remplace_recherche"];
			preg_match_all($cherche,$string,$matches);
			if(!empty($matches[0][0])) {
				foreach($matches[0] as $match) {
					$tab_reverse[]=array(
						"avant"=>(preg_replace($cherche,$remplace_lisible,$match)),
						"apres"=>(preg_replace($cherche,$remplace,$match)));
				}
			}
			//			$string=preg_replace('§ '.$expression["cherche"].' §',' '.$remplace.' ',$string);
			$string=preg_replace($cherche,' '.$remplace.' ',$string);
		}
		$string=self::deprotege_exceptions($string);
		return trim(str_replace("  "," ",$string));
	}

	public static function protege_exceptions($string) {
		$remplace=array();
		$recherche=array();
		foreach(self::$exceptions_inclues_tmp as $i=>$exc){
			$remplace[$i]=" $$$i$$ ";
			$recherche[$i]=" $exc ";
		}
		return str_ireplace($recherche,$remplace,$string);

	}
	public static function deprotege_exceptions($string) {
		//		print_rr(self::$exceptions_inclues_tmp);
		//		echo "<br>$$$string";
		$remplace=array();
		$recherche=array();
		foreach(self::$exceptions_inclues_tmp as $i=>$exc){
			$remplace[$i]=" $$$i$$ ";
			$recherche[$i]=" $exc ";
		}
		//		echo "<br>==>". str_ireplace($remplace,$recherche,$string)."<br>";
		return str_ireplace($remplace,$recherche,$string);
		return $string;
	}
	/**
	 * remplace les mots courants par les mots utilisés sur le site
	 * ex : TV , télévision, télé => televiseur
	 * @param $string
	 * @param $mot_supplementaire = MOT_SUPPL_INDEX | MOT_SUPPL_RECHERCHE
	 * @return unknown_type
	 */
	public static function normalise_mots($string,$mot_supplementaire=MOT_SUPPL_INDEX) {
		$string=" ".$string." ";
		foreach(self::$remplacements as $expression) {
			if($expression["mot_supplementaire"] == $mot_supplementaire && $mot_supplementaire>0) {
				$remplace =$expression["remplace"]." ".$expression["cherche"];
				$string=str_replace(' '.$expression["cherche"].' ',' '.$remplace.' ',$string);
			}
			else if($expression["mot_supplementaire"]==0) {
				$remplace = $expression["remplace"];
				$string=str_replace(' '.$expression["cherche"].' ',' '.$remplace.' ',$string);
			}
			/*$remplace = $expression["remplace"];
			 if($expression["mot_supplementaire"] == $mot_supplementaire && $mot_supplementaire>0) $remplace .=" ".$expression["cherche"];
			 $string=str_replace(' '.$expression["cherche"].' ',' '.$remplace.' ',$string);*/
		}
		return trim($string);
	}

	public static function normalise_mots_existants($str) {
		$split=explode(" ",$str);
		$retour=array();
		foreach($split as $i=>$mot) {
			if($mot[0]=="$") {
				$retour[]=$mot;
				continue;
			}
			$cplx = self::mot_to_complexe($mot);
			if(self::mot_est_valide($mot)) {
				$trouve = self::trouve_mot($cplx);
				if($trouve !== "non trouve") {
					$retour[]=$cplx["mot"];
				} else {
					$retour[]=$mot;
				}
			} else {
				$retour[]=$mot;
			}
		}
		if(self::$use_stats) print_rr($retour);
		return implode(" ",$retour);
	}

	public static function mot_est_valide($mot) {
		self::init_parseur();
		if(preg_match("/^([0-9]*)$/",$mot)) return true;
		if(strlen($mot)<=2) {
			//mot court
			if(in_array($mot,self::$exceptions_inclues)) return true;
			return in_array($mot,self::$exceptions_inclues_tmp);
		}
		return !in_array($mot,self::$exceptions_exclues);
	}

	/**
	 * renvoie vrai si ce mot est une exception qu'on inclut
	 * @param $mot
	 * @return unknown_type
	 */
	private static function est_exception_inclue($mot) {
		return in_array($mot,self::$exceptions_inclues);
	}

	public static function get_stats() {
		$retour=array();
		$retour["index_racines"]=count(self::$index_racines);
		$retour["exceptions_inclues"]=count(self::$exceptions_inclues);
		$retour["exceptions_exclues"]=count(self::$exceptions_exclues);
		$retour["expressions"]=count(self::$expressions);
		return $retour;
	}
}
?>