Python基于多种方式检测linux服务运行状态(进程筛选、systemctl、API)

 2024/11/19 

基于systemctl检查nginx服务状态

import subprocess

# nginx配置检查
def check_nginx_config():
    try:
        subprocess.run(['nginx', '-t'],capture_output=True,text=True,check=True)
        print("Checking nginx configuration successfully")
    except subprocess.CalledProcessError as e:
        print(e.stderr)

# 检测nginx是否运行，返回一个值，让重启函数判断
def check_nginx_process():
    try:
        result = subprocess.run(['systemctl', 'is-active', 'nginx'],capture_output=True,text=True,check=True)
        return result.stdout.strip() == 'active'
    # 如果返回rc非0，就报错
    except subprocess.CalledProcessError:
        print("nginx status failed")
        return False

# 如果nginx没有运行，进行重启的重启函数
def restart_nginx():
    try:
        subprocess.run(['systemctl','restart','nginx'],check=True)
        print("nginx has been restarted")
    except subprocess.CalledProcessError as e:
        print("Failed to restart nginx")
        print(e.stderr)

if __name__ == '__main__':
    check_nginx_config()
    if not check_nginx_process():
        restart_nginx()
    else:
        print('nginx is already running')

基于grep筛选监控tomcat服务状态

import subprocess
import time

def is_tomcat_running():
    try:
        result=subprocess.run("ps aux | grep tomcat | grep -v grep | grep -v python",
                              shell=True,check=True,text=True,capture_output=True)
        if result.stdout:
            print("tomcat is running")
            return True
        else:
            print("tomcat is not running")
            return False

    except Exception as e:
        print("tomcat status error")
        return False

def start_tomcat():
    try:
        tomcat_start_command="/usr/bin/tomcat/bin/catalina.sh"
        result=subprocess.run(tomcat_start_command,
                              shell=True,check=True,text=True)
        if result.returncode == 0:
            print("tomcat started successfully")
        else:
            print("tomcat started failed")
    except Exception as e:
        print(f"tomcat started failed:{e}")

def monitor_tomcat(interval):
    while True:
        if not is_tomcat_running():
            print("tomcat failed,try to start tomcat")
            start_tomcat()
        else:
            print("tomcat is running")
            time.sleep(interval)

if __name__ == '__main__':
    monitor_tomcat(5)

基于API检测es服务状态

es安装

yum install java-11-openjdk-devel -y
 
java --version
#openjdk 11.0.25 2024-10-15 LTS
#OpenJDK Runtime Environment (Red_Hat-11.0.25.0.9-1) (build 11.0.25+9-LTS)
#OpenJDK 64-Bit Server VM (Red_Hat-11.0.25.0.9-1) (build 11.0.25+9-LTS, mixed mode, sharing)

如果安装了多个java环境，可以通过alternatives --config java来手动选择系统默认环境

cat > /etc/yum.repos.d/elasticsearch.repo <<EOF
[elasticsearch-8.x] 
name=Elasticsearch repository for 8.x packages 
baseurl=https://artifacts.elastic.co/packages/8.x/yum 
gpgcheck=1 
gpgkey=https://artifacts.elastic.co/GPG-KEY-elasticsearch 
enabled=1 
autorefresh=1
type=rpm-md
EOF

yum install elasticsearch -y

速度很慢，我下载了半个小时才下载完

修改es配置文件

# 修改绑定地址到本地、修改端口
sed -i 's/^#network.host:.*/network.host: 0.0.0.0/g' /etc/elasticsearch/elasticsearch.yml
sed -i 's/^#http.port:.*/http.port: 9200/g' /etc/elasticsearch/elasticsearch.yml

# 关闭es安全加密
sed -i 's/^xpack.security.enabled:.*/xpack.security.enabled: false/g' /etc/elasticsearch/elasticsearch.yml

# 调整jvm容器，使其能提供足够的内存
echo "-Xms2g" >> /etc/elasticsearch/jvm.options
echo "-Xmx2g" >> /etc/elasticsearch/jvm.options

# 重启es
systemctl enable elasticsearch.service --now

# curl测试
curl -X GET localhost:9200
{
  "name" : "python",
  "cluster_name" : "elasticsearch",
  "cluster_uuid" : "GuInJXNZQYy3kvsD1BHfiA",
  "version" : {
    "number" : "8.16.0",
    "build_flavor" : "default",
    "build_type" : "rpm",
    "build_hash" : "12ff76a92922609df4aba61a368e7adf65589749",
    "build_date" : "2024-11-08T10:05:56.292914697Z",
    "build_snapshot" : false,
    "lucene_version" : "9.12.0",
    "minimum_wire_compatibility_version" : "7.17.0",
    "minimum_index_compatibility_version" : "7.0.0"
  },
  "tagline" : "You Know, for Search"
}

es常用操作

# 查看插件信息
curl -X GET localhost:9200/_cat/plugins?v
name component version

# 查看分片信息
curl -X GET localhost:9200/_cat/shards?v
index shard prirep state docs store dataset ip node

# 查看索引列表
curl -X GET localhost:9200/_cat/indices?v
health status index uuid pri rep docs.count docs.deleted store.size pri.store.size dataset.size

# 创建索引
curl -X PUT localhost:9200/my_index
#{"acknowledged":true,"shards_acknowledged":true,"index":"my_index"}

# 删除索引
curl -X DELETE localhost:9200/my_index
{"acknowledged":true}

# 插入文档——用json格式插入
curl -X POST "localhost:9200/articles/_doc/1" -H "Content-Type: application/json" -d '{"title": "这是我的第一篇文档","content":"这是第一篇文档的内容"}'

# 查询我刚刚插入的文档
curl http://192.168.10.114:9200/articles/_doc/1
{"_index":"articles","_id":"1","_version":1,"_seq_no":0,"_primary_term":1,"found":true,"_source":{"title": "这是我的第一篇文档","content":"这是第一篇文档的内容"}}

通过API方式检测es运行状态

import requests
import subprocess
import time

def check_es_status():
    try:
        response=requests.get('http://192.168.10.114:9200/_cluster/health')
        if response.status_code == 200:
            print("es is running")
            return True
    except requests.exceptions.RequestException as e:
        print(f"es status error:{e}")
        return False

def restart_es():
    try:
        print("trying to restart es")
        subprocess.run(['systemctl','restart','elasticsearch'],check=True)
        print("es is restarting successfully")
    except subprocess.CalledProcessError as e:
        print(f"failed to restart es:{e}")

if __name__ == '__main__':
    while True:
        if not check_es_status():
            restart_es()
        else:
            print("es is running")
            time.sleep(3)