2017年12月

1.基于网络http post


#!/usr/bin/python
#coding:utf-8
import urllib2
import MySQLdb
import threading
import sys
import re
import os
from elasticsearch import Elasticsearch
reload(sys) 
sys.setdefaultencoding('utf-8')

-------------  
#conf = ['192.168.0.107','root','pwd','chinacms4']
#db = MySQLdb.connect(conf[0],conf[1],conf[2],conf[3],charset="utf8")

def db_obj():
    conf = ['192.168.0.107','root','pwd','chinacms4']
    return MySQLdb.connect(conf[0],conf[1],conf[2],conf[3],charset="utf8")
es = Elasticsearch( "192.168.0.107:9200")

#使用es python插件 添加索引数据
def es_put(index_name,_id,type_name,data):
    global es
    try:
        es.index( index=index_name,doc_type=type_name,id=_id,body=data )
        print str(_id)+"add success!"
    except Exception,e:
        print repr(e)

#http_post 数据
#param string url
#param json data

def http_put(url,data):
    request = urllib2.Request(url,data)
    request.add_header('User-Agent', "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36")
    try:
        request = urllib2.urlopen(request)
        rv = request.read()
        request.close()
        return rv
    except Exception,e:
        print "............................................................................................"
        print url
        print data
        print e
        print "............................................................................................"


def strip_tags(html):
    #过滤
    dr = re.compile(r'<([^>]+)>',re.S)
    html = dr.sub('',html)
    html = re.sub(r'[\n]+',r'', html, flags=re.S)
    html = html.replace('\t','').replace('\n','').replace(' ','')
    html = html.replace(unichr(8206).encode('utf-8'), '') \
        .replace(unichr(8207).encode('utf-8'), '') \
        .replace(unichr(8205).encode('utf-8'), '') \
        .replace(unichr(8204).encode('utf-8'), '') \
        .replace(unichr(8234).encode('utf-8'), '') \
        .replace(unichr(8237).encode('utf-8'), '') \
        .replace(unichr(8238).encode('utf-8'), '') \
        .replace(unichr(8236).encode('utf-8'), '') \
        .replace(unichr(8302).encode('utf-8'), '') \
        .replace(unichr(8303).encode('utf-8'), '') \
        .replace(unichr(8299).encode('utf-8'), '') \
        .replace(unichr(8298).encode('utf-8'), '') \
        .replace(unichr(8301).encode('utf-8'), '') \
        .replace(unichr(8300).encode('utf-8'), '') \
        .replace(unichr(30).encode('utf-8'), '') \
    .replace(unichr(13).encode('utf-8'), '') \
    .replace(unichr(299).encode('utf-8'), '') \
    .replace(unichr(230).encode('utf-8'), '') \
    .replace(unichr(228).encode('utf-8'), '') \
    .replace(unichr(102).encode('utf-8'), '') \
    .replace(unichr(232).encode('utf-8'), '') \
    .replace(unichr(233).encode('utf-8'), '') \
    .replace(unichr(110).encode('utf-8'), '') \
    .replace(unichr(229).encode('utf-8'), '') \
        .replace(unichr(31).encode('utf-8'), '') 
    html = s = re.compile('[\\x00-\\x08\\x0b-\\x0c\\x0e-\\x1f]').sub('',html) 
    return html.strip()


def create_index(start,end):
    for i in range(start,end):
        db = db_obj()
        cur = db.cursor()
        sql = "select itemid,content from cms_news_data where itemid = %s"%i;
        cur.execute(sql)
        result = cur.fetchone()
        if result != None:
            #print str(result[0])+" "+result[3]
            #api_url = "http://192.168.0.107:9200/chinacms4/data/"+str(result[0])
            content = strip_tags(str(result[1]))
            content = content.replace('"','\"')
            content = content.replace("'","\'")
            data = '{"content":"'+content+'"}'
            data = unicode(data).encode("utf-8")
            #print data
            #print http_put(api_url,data)
            es_put("chinacms4",result[0],"content",data)        
        else:
            print str(i)+"null"
        db.close()

#创建300个线程

#for i in range(0,300):
#    threading.Thread(target=create_index,args=(i*10000,(i+1)*10000)).start()

for i in range(0,30):
        threading.Thread(target=create_index,args=(i*100000,(i+1)*100000)).start()

#threading.Thread(target=create_index,args=(1,500000)).start()
#threading.Thread(target=create_index,args=(500000,1000000)).start()
#threading.Thread(target=create_index,args=(1000000,1500000)).start()
#threading.Thread(target=create_index,args=(1500000,2000000)).start()
#threading.Thread(target=create_index,args=(2500000,3000000)).start()


#创建索引

#创建索引的api
#api_url = "http://192.168.0.107:9200/test/user/9"
#发布测试数据
#print api_url
#data = '{"username":"海莉·贝内特","age":28}'
#result = http_put(api_url,data)
#print result
  1. 基于python Elasticsearch插件

#!/usr/bin/python
#coding:utf-8
import urllib2
import MySQLdb
import threading
import sys
import re
import os
reload(sys) 
sys.setdefaultencoding('utf-8')

#conf = ['192.168.0.107','root','pwd','chinacms4']
#db = MySQLdb.connect(conf[0],conf[1],conf[2],conf[3],charset="utf8")

def db_obj():
    conf = ['192.168.0.107','root','pwd','chinacms4']
    return MySQLdb.connect(conf[0],conf[1],conf[2],conf[3],charset="utf8")

#http_post 数据
#param string url
#param json data

def http_put(url,data):
    request = urllib2.Request(url,data)
    request.add_header('User-Agent', "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36")
    try:
        request = urllib2.urlopen(request)
        rv = request.read()
        request.close()
        return rv
    except Exception,e:
        print "............................................................................................"
        print url
        print data
        print e
        print "............................................................................................"


def strip_tags(html):
    #过滤
    dr = re.compile(r'<([^>]+)>',re.S)
    html = dr.sub('',html)
    html = re.sub(r'[\n]+',r'', html, flags=re.S)
    return html.strip()


def create_index(start,end):
    for i in range(start,end):
        db = db_obj()
        cur = db.cursor()
        sql = "select itemid,content from cms_news_data where itemid = %s"%i;
        cur.execute(sql)
        result = cur.fetchone()
        if result != None:
            #print str(result[0])+" "+result[3]
            api_url = "http://192.168.0.107:9200/chinacms4/data/"+str(result[0])
            content = strip_tags(str(result[1]))
            content = content.replace('"','\"')
            content = content.replace("'","\'")
            data = '{"content":"'+content+'"}'
            #print data
            print http_put(api_url,data)        
        else:
            print str(i)+"null"
        db.close()

#创建20个线程

threading.Thread(target=create_index,args=(1,500000)).start()
threading.Thread(target=create_index,args=(500000,1000000)).start()
threading.Thread(target=create_index,args=(1000000,1500000)).start()
threading.Thread(target=create_index,args=(1500000,2000000)).start()
threading.Thread(target=create_index,args=(2500000,3000000)).start()


#创建索引

#创建索引的api
#api_url = "http://192.168.0.107:9200/test/user/9"
#发布测试数据
#print api_url
#data = '{"username":"海莉·贝内特","age":28}'
#result = http_put(api_url,data)
#print result

由于一些原因,首次使用elasticsearch竟然是在windows上,以下是windows下的一个记录。

https://www.elastic.co/downloads/past-releases/elasticsearch-2-3-4

放C盘 解压缩

安装成服务(删除服务的命令是sc delete 服务名称)

C:\Windows\system32>cd c:\

c:\>cd elasticsearch-2.3.4\bin

c:\elasticsearch-2.3.4\bin>service.bat install
Installing service      :  "elasticsearch-service-x64"
Using JAVA_HOME (64-bit):  "C:\Program Files\Java\jdk-9.0.1"
The service 'elasticsearch-service-x64' has been installed.

c:\elasticsearch-2.3.4\bin>

这时服务就有 elasticsearch-service-x64 了

服务启动失败,查看日志 C:elasticsearch-2.3.4logs

elasticsearch-service-x64-stderr.2017-12-19.log

Java HotSpot(TM) 64-Bit Server VM warning: Option UseParNewGC was deprecated in version 9.0 and will likely be removed in a future release.

JDK9不再支持UseConcMarkSweepGC

换一个版本JDK,用旧一点的,主要是为了导数据的一个工具 elasticsearch-jdbc-master ,这个工具更新进度跟不上
ES,只得用2.3.4 去年的版本。

jdk-8u151-windows-x64.exe

安装完成后

系统环境变量 JAVA_HOME C:Program FilesJavajdk1.8.0_151

不再报这个错误了,还是没有启动成功。

elasticsearch-service-x64.2017-12-19.log

[2017-12-19 16:17:36] [error] [ 4888] Failed creating java C:Program FilesJavajdk1.8.0_151binserverjvm.dll
[2017-12-19 16:17:36] [error] [ 4888] 系统找不到指定的路径。

环境变量换成 C:Program FilesJavajre1.8.0_151

启动成功

http://localhost:9200/ 已经可以访问

但不能外部访问,配置一下

configelasticsearch.yml

network.host: 0.0.0.0

http.port: 9200

重新启动下服务

外网可访问 http://192.168.0.107:9200/

注意防火墙要开端口

配置 0.0.0.0 如果机器有公网,也会被访问


非最新版本 Elasticsearch有一个web前端交互界面Elasticsearch head插件,

我们在Elasticsearch的bin目录下运行“plugin install mobz/elasticsearch-head”命令便会自动下载

Microsoft Windows [版本 6.3.9600]
(c) 2013 Microsoft Corporation。保留所有权利。

C:\Windows\system32>cd c:\

c:\>cd elasticsearch-2.3.4

c:\elasticsearch-2.3.4>cd bin

c:\elasticsearch-2.3.4\bin>plugin install mobz/elasticsearch-head
-> Installing mobz/elasticsearch-head...
Trying https://github.com/mobz/elasticsearch-head/archive/master.zip ...
Downloading ....................................................................
................................................................................
................................................................................
................................................................................
................................................................................
................................................................................
................................................................................
....................DONE
Verifying https://github.com/mobz/elasticsearch-head/archive/master.zip checksum
s if available ...
NOTE: Unable to verify checksum for downloaded plugin (unable to find .sha1 or .
md5 file to verify)
Installed head into c:\elasticsearch-2.3.4\plugins\head

c:\elasticsearch-2.3.4\bin>

http://192.168.0.107:9200/_plugin/head/ 看到UI了


创建一个所以test

在head页面中 复合查询

http://192.168.0.107:9200/test/
put

向索引中添加一条数据

PUT

http://192.168.0.107:9200/test/user/1/

{"username":"刘诗诗","age":18}

http://192.168.0.107:9200/test/user/2/

{"username":"张慧雯","age":22}

http://192.168.0.107:9200/test/user/3/

{"username":"夏晓薇","age":22}

http://192.168.0.107:9200/test/user/4/

{"username":"杨亚运","age":26}

put到同一id则可覆盖原数据 即更新
http://192.168.0.107:9200/test/user/4/
{"username":"李绮雯","age":26}

POST也可以
http://192.168.0.107:9200/test/user/5/
{"username":"高圆圆","age":28}


安装python插件可更快处理索引

pip install elasticsearch

history记录了用户在linux的命令日志,但仅仅只有命令记录。
修改一下,使这个日志能记录 时间 ip 用户等。

vim /etc/profile

追加

USER_IP=`who -u am i 2>/dev/null| awk '{print $NF}'|sed -e 's/[()]//g'`
export HISTTIMEFORMAT="[%F %T][`whoami`][${USER_IP}] "

使生效

source /etc/profile

效果

history

988 2017-12-07 13:23:22[192.168.0.2] cd /usr
989 2017-12-07 13:23:22[192.168.0.2] ls
990 2017-12-07 13:23:22[192.168.0.2] cd local
991 2017-12-07 13:23:22[192.168.0.2] ls
992 2017-12-07 13:23:22[192.168.0.2] cd coreseek
993 2017-12-07 13:23:22[192.168.0.2] ls

技术点
1.shell自动备份数据库脚本
2.shell中使用ftp

#!/bin/bash
bak_date=`date +%Y_%m_%d_%H_%M_%S`.sql.gz
mysqldump -uroot -p123 db1 | gzip > /home/bak/db1$bak_date
mysqldump -uroot -p123 db2 | gzip > /home/bak/db2$bak_date
mysqldump -uroot -p123 db3 | gzip > /home/bak/db3$bak_date
lftp <<EOF
lftp -u user,password ftps://192.168.0.6:99
put /home/bak/db1$bak_date
put /home/bak/db2$bak_date
put /home/bak/db3$bak_date
bye
EOF

3.在crontab中自动执行

SHELL=/bin/sh
PATH=/sbin:/bin:/usr/sbin:/usr/bin:/usr/local/mysql/bin
10 19 * * * /home/shell/mysql_bak.sh

每天19点10自动执行一次备份脚本,
关于环境变量路径的问题,定时任务中执行的脚本有mysqldump命令,必须指定好路径 /usr/local/mysql/bin。