[这是本人工作中实现的coreseek/sphinx搜索,稍做了些修改,希望大家提出优化意见]
2.建立索引的php代码(sphinxindex.php)
3.运行代码,建立索引
$ret = $obj->query($_REQUEST);
$ret 就是搜索结果。
1.环境说明
安装目录:/var/coreseek/coreseek
配置文件:(./etc/sphinx.conf)
#searchd服务配置
searchd
{
port = 9312
log = /var/coreseek/coreseek/var/log/searchd.log
query_log = /var/coreseek/coreseek/var/log/query.log
read_timeout = 5
max_children = 30
pid_file = /var/coreseek/coreseek/var/log/searchd.pid
max_matches = 1000
seamless_rotate = 1
preopen_indexes = 0
unlink_old = 1
listen = localhost:9306:mysql41
}
#数据源配置
source ztm_sheet_xml_source
{
type = xmlpipe
xmlpipe_command = cat /var/www/htdocs/myweb/application/data/sphinxforsheet.pipe.xml
xmlpipe_field = sh_title
xmlpipe_field = sh_intro
xmlpipe_attr_uint = sh_id
xmlpipe_attr_timestamp = sh_time
xmlpipe_attr_uint = sh_uid
xmlpipe_attr_uint = sh_subject
xmlpipe_attr_uint = sh_question_count
xmlpipe_attr_uint = sh_note_count
xmlpipe_attr_uint = sh_view_count
xmlpipe_attr_uint = sh_share_count
xmlpipe_attr_uint = sh_praise_count
xmlpipe_attr_uint = sh_download_count
xmlpipe_attr_uint = sh_admin_note
xmlpipe_attr_uint = sh_year
xmlpipe_attr_uint = sh_type
xmlpipe_attr_uint = sh_grade
xmlpipe_attr_uint = sh_city
}
#索引配置
index ztm_sheet_index
{
source = ztm_sheet_xml_source
path = /var/coreseek/coreseek/var/data/sheet
docinfo = extern
charset_dictpath = /var/coreseek/mmseg3/etc
charset_type = zh_cn.utf-8
ngram_len = 0
}
2.建立索引的php代码(sphinxindex.php)
<?php
class script_sphinxindexforsheet {
public static function init() {
$content = self::getXmlPipeHead();
$filename = "sphinxforsheet.pipe.xml";
$f = fopen($filename, "w");
if($f){
fwrite($f, $content);
}
fclose($f);
}
public static function generateIndex() {
self::init();
$begin = time();
$min_id = 1; // 最小的id值,最好使用程序获取
$max_id = 100000; // 最大的id值,最好使用程序获取
$begin_id = $min_id;
$limit = 3000;
do{
$time1 = time();
unset($list);
$list = self::getSheets($begin_id-1, $begin_id+$limit);
$begin_id += $limit;
usleep(300);
}while(($begin_id-$limit)<=$max_id);
self::appendXmlPipe(self::getXmlPipeFooter());
}
protected static function appendXmlPipe($content){
$f = fopen("sphinxforsheet.pipe.xml", "a");
if($f){
fwrite($f, $content);
}
fclose($f);
}
protected static function getXmlPipeFooter(){
$xml = "</sphinx:docset>\n";
return $xml;
}
private static function getSheets($offset, $length) {
$offset = $offset-1;
$host = 'localhost:';
$port = 3306;
$user = 'root';
$pwd = 'abc123';
$dbname = 'test';
$mysql = mysql_connect($host.$port, $user, $pwd) or die('Could not connect: ' .
mysql_error());
mysql_select_db($dbname);
$sql = "select id,uid,create_time,title,intro,subject,question_count,note_count,view_count,share_count,praise_count,download_count,admin_note,type,year,city,grade from sheet where id>$offset and id<$length";
$ret = mysql_query($sql);
while ($row = mysql_fetch_assoc($ret)) {
$xml .="<sphinx:document id=\"" . $row['id'] . "\">\n";
$xml .="<sh_title><![CDATA[" . $row['title'] ."]]></sh_title>\n";
$xml .="<sh_intro><![CDATA[" . $row['intro'] ."]]></sh_intro>\n";
$xml .="<sh_id>" . $row['id'] ."</sh_id>\n";
$xml .="<sh_uid>" . $row['uid'] ."</sh_uid>\n";
$xml .="<sh_time>" . $row['create_time'] ."</sh_time>\n";
$xml .="<sh_subject>" . $row['subject'] ."</sh_subject>\n";
$xml .="<sh_question_count>" . $row['question_count'] ."</sh_question_count>\n";
$xml .="<sh_note_count>" . $row['note_count'] ."</sh_note_count>\n";
$xml .="<sh_view_count>" . $row['view_count'] ."</sh_view_count>\n";
$xml .="<sh_share_count>" . $row['share_count'] ."</sh_share_count>\n";
$xml .="<sh_praise_count>" . $row['praise_count'] ."</sh_praise_count>\n";
$xml .="<sh_download_count>" . $row['download_count'] ."</sh_download_count>\n";
$xml .="<sh_admin_note>" . $row['admin_note'] ."</sh_admin_note>\n";
$xml .="<sh_year>" . $row['year'] ."</sh_year>\n";
$xml .="<sh_type>" . $row['type'] ."</sh_type>\n";
$xml .="<sh_grade>" . $row['grade'] ."</sh_grade>\n";
$xml .="<sh_city>" . $row['city'] ."</sh_city>\n";
$xml .="</sphinx:document>\n";
}
mysql_close($mysql);
self::appendXmlPipe($xml);
}
private static function getXmlPipeHead(){
$xml = "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n";
$xml .= "<sphinx:docset>\n";
$xml .= "<sphinx:schema>\n";
$xml .= "<sphinx:field name=\"sh_title\"/>\n";
$xml .= "<sphinx:field name=\"sh_intro\"/>\n";
$xml .= "<sphinx:attr name=\"sh_id\" type=\"int\"/>\n";
$xml .= "<sphinx:attr name=\"sh_uid\" type=\"int\"/>\n";
$xml .= "<sphinx:attr name=\"sh_time\" type=\"timestamp\"/>\n";
$xml .= "<sphinx:attr name=\"sh_subject\" type=\"int\"/>\n";
$xml .= "<sphinx:attr name=\"sh_question_count\" type=\"int\"/>\n";
$xml .= "<sphinx:attr name=\"sh_note_count\" type=\"int\"/>\n";
$xml .= "<sphinx:attr name=\"sh_view_count\" type=\"int\"/>\n";
$xml .= "<sphinx:attr name=\"sh_share_count\" type=\"int\"/>\n";
$xml .= "<sphinx:attr name=\"sh_praise_count\" type=\"int\"/>\n";
$xml .= "<sphinx:attr name=\"sh_download_count\" type=\"int\"/>\n";
$xml .= "<sphinx:attr name=\"sh_admin_note\" type=\"int\"/>\n";
$xml .= "<sphinx:attr name=\"sh_year\" type=\"int\"/>\n";
$xml .= "<sphinx:attr name=\"sh_type\" type=\"int\"/>\n";
$xml .= "<sphinx:attr name=\"sh_grade\" type=\"int\"/>\n";
$xml .= "<sphinx:attr name=\"sh_city\" type=\"int\"/>\n";
$xml .= "</sphinx:schema>\n";
return $xml;
}
}
script_sphinxindexforsheet::generateIndex();
?>
3.运行代码,建立索引
cd /var/www/htdocs/myweb/application/script
php sphinxindex.php &
sleep 5
#index
cd /var/coreseek/coreseek
./bin/indexer -c etc/sphinx.conf --rotate ztm_sheet_index
<?php
require_once '/var/www/htdocs/myweb/application/lib/sphinx/sphinxapi.php';
class application_biz_searchsheet {
public function query() {
$verify = $_POST;
$key = isset($_REQUEST['k'])?strval($_REQUEST['k']):'';
$subject = isset($_REQUEST['s'])?strval($_REQUEST['s']):'';
//年份范围
$year_min = -1;
$year_max = -1;
if(isset($_REQUEST['ymin']) && isset($_REQUEST['ymax'])){
$year_min = intval($_REQUEST['ymin']);
$year_max = intval($_REQUEST['ymax']);
}
// 城市
$city = isset($_REQUEST['c'])?intval($_REQUEST['c']):0;
// 卷子类型
$arr_type = isset($_REQUEST['t'])?strval($_REQUEST['t']):null;
$arr_shtype = null;
if($arr_type){
$arr_shtype = array();
foreach($arr_type as $type){
$arr_shtype[] = intval($type);
}
}
$grade = isset($_REQUEST['g'])?intval($_REQUEST['g']):-1; // 年级
$page = isset($_REQUEST['p'])?intval($_REQUEST['p']):1; // 页数
$desc_order_by = "sh_download_count";//默认按照时间排序
if(isset($_REQUEST['desc'])){
$desc_order_by = $_REQUEST['desc'];
}
$list = $this->search(array(
'key'=>$key,
'subject'=>$subject,
'year_min'=>$year_min,
'year_max'=>$year_max,
'city' => $city,
'arr_shtype'=>$arr_shtype,
'grade'=>$grade,
'page'=>$page,
'pagenum'=>20,
'desc_order_by'=>$desc_order_by));
return array(
"verify"=>$verify,
"list"=>$list,
);
}
public function search($condition, $flag = false) {
$res = $this->searchd($condition);
$data = array();
if ($res) {
$total = $res['total'];
$total_found = $res['total_found'];
if($res && isset($res['matches'])) {
$matches = $res['matches'];
// 根据 $matches 的值再去查询数据库,即可获得对应的详细数据
foreach ($matches as $row) {
$data['sheet'][] = "查询数据库获得的结果";
}
}
$pagenum = isset($condition['pagenum'])?$condition['pagenum']:10;
$data['other']['total'] = $total;
$data['other']['total_found'] = $total_found;
$data['other']['total_page'] = $total%$pagenum==0?intval($total/$pagenum):intval($total/$pagenum)+1;
}
return $data;
}
public function searchd($condition) {
// 关键字
$key = isset($condition['key'])?strval($condition['key']):'';
// 当前科目域名
$subject = isset($condition['subject'])?trim(strval($condition['subject'])):'';
// 年份下限
$year_min = isset($condition['year_min'])?intval($condition['year_min']):-1;
// 年份上限
$year_max = isset($condition['year_max'])?intval($condition['year_max']):-1;
// 城市
$city = isset($condition['city'])?intval($condition['city']):0;
// 卷子类型
$arr_shtype = isset($condition['arr_shtype'])?$condition['arr_shtype']:null;
// 年级
$grade = isset($condition['grade'])?intval($condition['grade']):-1;
//分页,从1开始
$page = isset($condition['page'])?intval($condition['page']):1;
//每页数量
$pagenum = isset($condition['pagenum'])?intval($condition['pagenum']):10;
//用来做降序排列的字段名称
$desc_order_by = isset($condition['desc_order_by'])?$condition['desc_order_by']:null;//
try {
$client = $this->connect();
$client->SetMatchMode ( SPH_MATCH_EXTENDED2 );
$client->SetRankingMode ( SPH_RANK_WORDCOUNT );
if($desc_order_by){
$client->SetSortMode (SPH_SORT_ATTR_DESC, $desc_order_by);
} else {
$client->SetSortMode ( SPH_SORT_RELEVANCE );
}
$sid = isset($condition['sid'])?$condition['sid']:0;
if($sid>0){
$client->SetFilter('sh_subject', array($sid));
}
// 年份范围
if($year_min>-1 && $year_max>-1){
$client->SetFilterRange('sh_year', $year_min, $year_max);
}
// 城市
if($city>0){
$client->SetFilter('sh_city', array($city));
}
// 卷子类型
if($arr_shtype){
$client->SetFilter('sh_type', $arr_shtype);
}
// 年级
if($grade>-1){
$client->SetFilter('sh_grade', array($grade));
}
// 分页
$page = ($page < 1) ? 1 : $page;
$offset = ($page-1)*$pagenum;
$client->SetLimits($offset, $pagenum);
$res = $client->Query ("$key", "ztm_sheet_index");
return $res;
} catch (Exception $e) {
return false;
}
}
protected function connect(){
try{
$host = '127.0.0.1';
$port = 9312;
$client = new SphinxClient ();
$client->SetServer ($host, intval($port));
return $client;
} catch (Exception $e) {
return false;
}
}
}
?>
$obj = new application_biz_searchsheet();
$ret = $obj->query($_REQUEST);
$ret 就是搜索结果。
来源:oschina
链接:https://my.oschina.net/u/815299/blog/114207