2017年11月

coreseek配置文件sphinx.conf

#
# Minimal Sphinx configuration sample (clean, simple, functional)
#

#门窗源
source chinamenwang{
    type                    = mysql

        sql_host                = 192.168.1.2
        sql_user                = user
        sql_pass                = pwd
        sql_db                  = dbname
        sql_port                = 3306  # optional, default is 3306
        sql_query_pre = replace into dt_sphinx select 1,max(itemid) from dt_article_21
        sql_query               = \
                SELECT itemid,title,username from dt_article_21 where itemid <=(select maxid from dt_sphinx where moduleid = 1)
    sql_query_pre = SET NAMES utf8
    #                         
}
#门窗增量源
source delta_chinamenwang:chinamenwang{
    sql_query_pre = SET NAMES utf8
        sql_query = SELECT itemid,title,username from dt_article_21 where itemid>( select maxid from dt_sphinx where moduleid = 1 )    
}

#门窗索引
index chinamenwang{
    source                  = chinamenwang
        path                    = /usr/local/coreseek/var/data/chinamenwang
        docinfo                 = extern
        stopwords = /usr/local/coreseek/var/data/stopwords.txt
        charset_dictpath = /usr/local/mmseg3/etc/
        ngram_len = 0

        charset_type                    = zh_cn.utf-8

}
#门窗增量
index delta_chinamenwang:chinamenwang
{
    source            = delta_chinamenwang
    path            = /usr/local/coreseek/var/data/delta_chinamenwang
    docinfo                 = extern
    stopwords = /usr/local/coreseek/var/data/stopwords.txt
        charset_dictpath = /usr/local/mmseg3/etc/
        ngram_len = 0

        charset_type                    = zh_cn.utf-8
}




indexer
{
    mem_limit        = 128M
}


searchd
{
    listen            = 9312
    listen            = 9306:mysql41
    log            = /usr/local/coreseek/var/log/searchd.log
    query_log        = /usr/local/coreseek/var/log/query.log
    read_timeout        = 5
    max_children        = 30
    pid_file        = /usr/local/coreseek/var/log/searchd.pid
    max_matches        = 1000
    seamless_rotate        = 1
    preopen_indexes        = 1
    unlink_old        = 1
    workers            = threads # for RT to work
}

shell重建索引脚本

#!/bin/sh 
/usr/local/coreseek/bin/searchd --stop -c /usr/local/coreseek/etc/sphinx.conf >> /shell/`date "+%Y-%m-%d %H:%M:%S"`_stop.log
sleep 2
/usr/local/coreseek/bin/indexer -c /usr/local/coreseek/etc/sphinx.conf --all >> /shell/`date "+%Y-%m-%d %H:%M:%S"`_index.log
sleep 2
/usr/local/coreseek/bin/searchd -c  /usr/local/coreseek/etc/sphinx.conf >> /shell/`date "+%Y-%m-%d %H:%M:%S"`_start.log

shell增量索引脚本

#!/bin/sh  
/usr/local/coreseek/bin/indexer -c /usr/local/coreseek/etc/sphinx.conf delta_chinamenwang --rotate >> /shell/sp_chinamenwang_zengliang.log
sleep 1
/usr/local/coreseek/bin/indexer -c /usr/local/coreseek/etc/sphinx.conf --merge chinamenwang delta_chinamenwang --rotate --merge-dst-range deleted 0 0 >> /shell/sp_chinamenwang_zengliang.log
echo `date "+%Y-%m-%d %H:%M:%S"` >> /shell/sp_chinamenwang_zengliang.log

crontab 定时任务增量

*/1 * * * * /bin/sh /shell/sp_chinamenwang.zengliang.sh

增量涉及的数据表

CREATE TABLE `sph_counter` (
  `counter_id` int(4) NOT NULL,
  `max_doc_id` bigint(20) DEFAULT NULL,
  PRIMARY KEY (`counter_id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8;

要定期重建索引,比如每天0点。
增量根据业务场景,最快可每分钟增量一次。
增量索引不会影响 原已索引历史数据中的数据,即 如果在真实表上修改了标题,必须以重建索引的方式做到更新。

先安装jdk ,
http://www.oracle.com/technetwork/java/javase/downloads/jdk9-downloads-3848520.html
20170122在blog常用软件加了jdk9.0.4的下载文件
https://blog.pucipuci.cn/file/jdk-9.0.4_linux-x64_bin.tar.gz

wget http://download.oracle.com/otn-pub/java/jdk/9.0.1+11/jdk-9.0.1_linux-x64_bin.tar.gz?AuthParam=1511763152_88b88232c3dbf65f786070b6e8833a6c
tar -zxvf jdk-9.0.1_linux-x64_bin.tar.gz?AuthParam=1511763152_88b88232c3dbf65f786070b6e8833a6c
cp -r jdk-9.0.1 /usr/local/


vi /etc/profile
#写入
export JAVA_HOME=/usr/local/jdk-9.0.4
export PATH=$JAVA_HOME/bin:$PATH
export CLASSPATH=.:$JAVA_HOME/lib/dt.jar:$JAVA_HOME/lib/tools.jar
source /etc/profile

[root@HQ tmp]# java -version
java version "9.0.1"
Java(TM) SE Runtime Environment (build 9.0.1+11)
Java HotSpot(TM) 64-Bit Server VM (build 9.0.1+11, mixed mode)


wget https://raw.githubusercontent.com/MyCATApache/Mycat-download/master/1.4-RELEASE/Mycat-server-1.4-release-20151019230038-linux.tar.gz
tar -zxvf Mycat-server-1.4-release-20151019230038-linux.tar.gz
mv mycat/ /usr/local/

# groupadd mycat
# useradd -s /sbin/nologin -g mycat -M mycat
# passwd mycat

chown -R mycat.mycat /usr/local/mycat/
cd /usr/local
cd mycat
cd bin
./mycat start

ps -ef | grep mycat

[root@HQ bin]# ps -ef | grep mycat
root       3700      1  0 11:00 ?        00:00:00 /usr/local/mycat/bin/./wrapper-linux-x86-64 /usr/local/mycat/conf/wrapper.conf wrapper.syslog.ident=mycat wrapper.pidfile=/usr/local/mycat/logs/mycat.pid wrapper.daemonize=TRUE wrapper.lockfile=/var/lock/subsys/mycat
root       3726   3297  0 11:00 pts/1    00:00:00 grep mycat

netstat -anp | grep 8066

有端口监听

mysql -uroot -p -P8066 -hlocalhost

成功!!

全局还是不变

#!/usr/bin/python
#coding:utf-8
import requests
import re
import os
import urllib
import time
import random
import MySQLdb
import sys

内容获取细节

html = requests.get(url,headers=headers)        
        if html != None:
            print html.content.decode('gb2312')

在实际运行中,抓取列表成功了,但在抓取正文失败
html.content.decode('gb2312')报
'ascii' codec can't decode byte 0xe4 in position 0: ordinal not in range(128)

继续处理如下

reload(sys) 
sys.setdefaultencoding('utf-8')

mysql> alter table news add create_time datetime null;
ERROR 1878 (HY000): Temporary file write failure.

原因默认tmp空间不够

处理方法:

[root@s110732 ~]# vim /etc/my.cnf #把tmpdir设置到 /data/tmp
[root@s110732 ~]# mkdir -p /data/tmp
[root@s110732 ~]# service mysqld restart
Shutting down MySQL..... SUCCESS! 
Starting MySQL. ERROR! The server quit without updating PID file 
[root@s110732 ~]# chown -R mysql:mysql /data/tmp
[root@s110732 ~]# service mysqld restart
 ERROR! MySQL server PID file could not be found!
Starting MySQL.. SUCCESS! 

yum install -y openssl-devel
cd /usr/tmp
wget http://www.keepalived.org/software/keepalived-1.2.2.tar.gz  
tar xvzf keepalived-1.2.2.tar.gz 
cd keepalived-1.2.2  
#./configure --disable-fwmark --prefix=/usr/local/keepalived 
./configure --prefix=/usr/local/keepalived
configure: error: Popt libraries is required
yum install popt-devel

make && make install
cp /usr/local/keepalived/etc/rc.d/init.d/keepalived /etc/init.d/
cp /usr/local/keepalived/etc/sysconfig/keepalived /etc/sysconfig/
mkdir /etc/keepalived/
cp /usr/local/keepalived/etc/keepalived/keepalived.conf /etc/keepalived/
cp /usr/local/keepalived/sbin/keepalived /usr/sbin/

chmod +x /etc/init.d/keepalived
chkconfig --add keepalived
chkconfig keepalived on

mv /etc/keepalived/keepalived.conf /etc/keepalived/keepalived.conf.bak
vim /etc/keepalived/keepalived.conf