Bugfix RtLevel, software watchdog for PLC Program

RtLevel was not set on restart plc program after crash
Software watchdog will restart plc program, if it does not toggle core.wd
in the configured time. It is the same bit and function as on connect
device to trigger hardware watchdog (bit 7 in RevPiLED).
This commit is contained in:
2020-05-01 22:59:10 +02:00
parent d77fea0b48
commit beb073893b
5 changed files with 262 additions and 34 deletions

View File

@@ -2,6 +2,7 @@
<dictionary name="akira"> <dictionary name="akira">
<words> <words>
<w>ctory</w> <w>ctory</w>
<w>popen</w>
</words> </words>
</dictionary> </dictionary>
</component> </component>

View File

@@ -1,13 +1,16 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
"""Helperfunktionen fuer das gesamte RevPiPyLoad-System.""" """Helperfunktionen fuer das gesamte RevPiPyLoad-System."""
__author__ = "Sven Sager" __author__ = "Sven Sager"
__copyright__ = "Copyright (C) 2018 Sven Sager" __copyright__ = "Copyright (C) 2020 Sven Sager"
__license__ = "GPLv3" __license__ = "GPLv3"
import os import os
import proginit from json import load, loads
from re import match as rematch from re import match as rematch
from subprocess import Popen, PIPE from subprocess import Popen, PIPE
import proginit
def _setuprt(pid, evt_exit): def _setuprt(pid, evt_exit):
"""Konfiguriert Programm fuer den RT-Scheduler. """Konfiguriert Programm fuer den RT-Scheduler.
@@ -117,6 +120,32 @@ def _zeroprocimg():
) )
def get_revpiled_address(configrsc_bytes: bytes) -> int:
"""
Find byte address of revpiled output.
:return: Address or -1 on error
"""
try:
rsc = loads(configrsc_bytes.decode()) # type: dict
except Exception:
return -1
byte_address = -1
for device in rsc.get("Devices", ()): # type: dict
if device.get("type", "") == "BASE":
try:
byte_address = device["offset"] + int(device["out"]["0"][3])
proginit.logger.debug(
"found revpi_led_address on {0} byte".format(byte_address)
)
except Exception:
pass
break
return byte_address
def refullmatch(regex, string): def refullmatch(regex, string):
"""re.fullmatch wegen alter python version aus wheezy nachgebaut. """re.fullmatch wegen alter python version aus wheezy nachgebaut.

View File

@@ -1,18 +1,20 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
"""Modul fuer die Verwaltung der PLC Funktionen.""" """Modul fuer die Verwaltung der PLC Funktionen."""
__author__ = "Sven Sager" __author__ = "Sven Sager"
__copyright__ = "Copyright (C) 2018 Sven Sager" __copyright__ = "Copyright (C) 2020 Sven Sager"
__license__ = "GPLv3" __license__ = "GPLv3"
import os import os
import proginit
import shlex import shlex
import subprocess import subprocess
from logsystem import PipeLogwriter
from helper import _setuprt, _zeroprocimg
from sys import stdout as sysstdout from sys import stdout as sysstdout
from threading import Event, Thread from threading import Event, Thread
from time import sleep, asctime from time import sleep, asctime
import proginit
from helper import _setuprt, _zeroprocimg
from logsystem import PipeLogwriter
from watchdogs import SoftwareWatchdog
class RevPiPlc(Thread): class RevPiPlc(Thread):
@@ -45,6 +47,16 @@ class RevPiPlc(Thread):
self.zeroonerror = False self.zeroonerror = False
self.zeroonexit = False self.zeroonexit = False
# Software watchdog
self.softdog = SoftwareWatchdog(0, 0)
def __exec_rtlevel(self):
"""RealTime Scheduler nutzen nach 5 Sekunden Programmvorlauf."""
if self.rtlevel > 0 \
and not self._evt_exit.wait(5) \
and self._procplc.poll() is None:
_setuprt(self._procplc.pid, self._evt_exit)
def __get_autoreloaddelay(self): def __get_autoreloaddelay(self):
"""Getter fuer autoreloaddelay. """Getter fuer autoreloaddelay.
@return Delayzeit in Sekunden <class 'int'>""" @return Delayzeit in Sekunden <class 'int'>"""
@@ -103,6 +115,11 @@ class RevPiPlc(Thread):
stderr=subprocess.STDOUT stderr=subprocess.STDOUT
) )
proginit.logger.debug("leave RevPiPlc._spopen()") proginit.logger.debug("leave RevPiPlc._spopen()")
# Pass process to watchdog
self.softdog.kill_process = sp
self.softdog.reset()
return sp return sp
def newlogfile(self): def newlogfile(self):
@@ -138,12 +155,7 @@ class RevPiPlc(Thread):
# Prozess erstellen # Prozess erstellen
proginit.logger.info("start plc program {0}".format(self._program)) proginit.logger.info("start plc program {0}".format(self._program))
self._procplc = self._spopen(lst_proc) self._procplc = self._spopen(lst_proc)
self.__exec_rtlevel()
# RealTime Scheduler nutzen nach 5 Sekunden Programmvorlauf
if self.rtlevel > 0 \
and not self._evt_exit.wait(5) \
and self._procplc.poll() is None:
_setuprt(self._procplc.pid, self._evt_exit)
# Überwachung starten # Überwachung starten
while not self._evt_exit.is_set(): while not self._evt_exit.is_set():
@@ -152,22 +164,11 @@ class RevPiPlc(Thread):
self.exitcode = self._procplc.poll() self.exitcode = self._procplc.poll()
if self.exitcode is not None: if self.exitcode is not None:
# Do nothing, if delay for restart is running
if self._delaycounter == self._autoreloaddelay: if self._delaycounter == self._autoreloaddelay:
if self.exitcode > 0: self.softdog.stop()
# PLC Python Programm abgestürzt
proginit.logger.error(
"plc program crashed - exitcode: {0}".format(
self.exitcode
)
)
if self.zeroonerror:
_zeroprocimg()
proginit.logger.warning(
"set piControl0 to ZERO after "
"PLC program error"
)
else: if self.exitcode == 0:
# PLC Python Programm sauber beendet # PLC Python Programm sauber beendet
proginit.logger.info("plc program did a clean exit") proginit.logger.info("plc program did a clean exit")
if self.zeroonexit: if self.zeroonexit:
@@ -176,6 +177,18 @@ class RevPiPlc(Thread):
"set piControl0 to ZERO after " "set piControl0 to ZERO after "
"PLC program returns clean exitcode" "PLC program returns clean exitcode"
) )
else:
# PLC Python Programm abgestürzt
proginit.logger.error(
"plc program crashed - exitcode: {0}"
"".format(self.exitcode)
)
if self.zeroonerror:
_zeroprocimg()
proginit.logger.warning(
"set piControl0 to ZERO after "
"PLC program error"
)
if not self._evt_exit.is_set() and self.autoreload: if not self._evt_exit.is_set() and self.autoreload:
self._delaycounter -= 1 self._delaycounter -= 1
@@ -192,6 +205,7 @@ class RevPiPlc(Thread):
proginit.logger.warning( proginit.logger.warning(
"restart plc program after crash" "restart plc program after crash"
) )
self.__exec_rtlevel()
else: else:
break break
@@ -211,6 +225,7 @@ class RevPiPlc(Thread):
proginit.logger.info("stop revpiplc thread") proginit.logger.info("stop revpiplc thread")
self._evt_exit.set() self._evt_exit.set()
self.softdog.stop()
# Prüfen ob es einen subprocess gibt # Prüfen ob es einen subprocess gibt
if self._procplc is None: if self._procplc is None:

View File

@@ -26,7 +26,7 @@ begrenzt werden!
""" """
__author__ = "Sven Sager" __author__ = "Sven Sager"
__copyright__ = "Copyright (C) 2018 Sven Sager" __copyright__ = "Copyright (C) 2020 Sven Sager"
__license__ = "GPLv3" __license__ = "GPLv3"
__version__ = "0.8.5" __version__ = "0.8.5"
@@ -48,7 +48,7 @@ import logsystem
import picontrolserver import picontrolserver
import plcsystem import plcsystem
import proginit import proginit
from helper import refullmatch from helper import refullmatch, get_revpiled_address
from shared.ipaclmanager import IpAclManager from shared.ipaclmanager import IpAclManager
from watchdogs import ResetDriverWatchdog from watchdogs import ResetDriverWatchdog
from xrpcserver import SaveXMLRPCServer from xrpcserver import SaveXMLRPCServer
@@ -81,6 +81,7 @@ class RevPiPyLoad():
self.pictorymtime = 0 self.pictorymtime = 0
self.replaceiosmtime = 0 self.replaceiosmtime = 0
self.replaceiofail = False self.replaceiofail = False
self.revpi_led_address = -1
# Berechtigungsmanger # Berechtigungsmanger
if proginit.pargs.developermode: if proginit.pargs.developermode:
@@ -219,8 +220,8 @@ class RevPiPyLoad():
"plcworkdir", ".") "plcworkdir", ".")
self.plcprogram = self.globalconfig["DEFAULT"].get( self.plcprogram = self.globalconfig["DEFAULT"].get(
"plcprogram", "none.py") "plcprogram", "none.py")
self.plcprogram_watchdog = self.globalconfig["DEFAULT"].getboolean( self.plcprogram_watchdog = self.globalconfig["DEFAULT"].getint(
"plcprogram_watchdog", False) "plcprogram_watchdog", 0)
self.plcarguments = self.globalconfig["DEFAULT"].get( self.plcarguments = self.globalconfig["DEFAULT"].get(
"plcarguments", "") "plcarguments", "")
self.plcworkdir_set_uid = self.globalconfig["DEFAULT"].getboolean( self.plcworkdir_set_uid = self.globalconfig["DEFAULT"].getboolean(
@@ -374,6 +375,7 @@ class RevPiPyLoad():
) )
self.plc.autoreload = self.autoreload self.plc.autoreload = self.autoreload
self.plc.autoreloaddelay = self.autoreloaddelay self.plc.autoreloaddelay = self.autoreloaddelay
self.plc.softdog.timeout = self.plcprogram_watchdog
self.plc.zeroonerror = self.zeroonerror self.plc.zeroonerror = self.zeroonerror
self.plc.zeroonexit = self.zeroonexit self.plc.zeroonexit = self.zeroonexit
@@ -547,7 +549,6 @@ class RevPiPyLoad():
"""Konfiguriert den PLC-Thread fuer die Ausfuehrung. """Konfiguriert den PLC-Thread fuer die Ausfuehrung.
@return PLC-Thread Object or None""" @return PLC-Thread Object or None"""
proginit.logger.debug("enter RevPiPyLoad._plcthread()") proginit.logger.debug("enter RevPiPyLoad._plcthread()")
th_plc = None
# Prüfen ob Programm existiert # Prüfen ob Programm existiert
if not os.path.exists(os.path.join(self.plcworkdir, self.plcprogram)): if not os.path.exists(os.path.join(self.plcworkdir, self.plcprogram)):
@@ -556,6 +557,16 @@ class RevPiPyLoad():
)) ))
return None return None
# Check software watchdog
if self.revpi_led_address < 0 < self.plcprogram_watchdog:
proginit.logger.error(
"can not start plc program, because watchdog is activated "
"but no address was found in piCtory configuration"
)
return None
th_plc = None
proginit.logger.debug("create PLC program watcher") proginit.logger.debug("create PLC program watcher")
th_plc = plcsystem.RevPiPlc( th_plc = plcsystem.RevPiPlc(
os.path.join(self.plcworkdir, self.plcprogram), os.path.join(self.plcworkdir, self.plcprogram),
@@ -567,6 +578,9 @@ class RevPiPyLoad():
th_plc.gid = self.plcgid th_plc.gid = self.plcgid
th_plc.uid = self.plcuid th_plc.uid = self.plcuid
th_plc.rtlevel = self.rtlevel th_plc.rtlevel = self.rtlevel
th_plc.softdog.address = \
0 if self.revpi_led_address < 0 else self.revpi_led_address
th_plc.softdog.timeout = self.plcprogram_watchdog
th_plc.zeroonerror = self.zeroonerror th_plc.zeroonerror = self.zeroonerror
th_plc.zeroonexit = self.zeroonexit th_plc.zeroonexit = self.zeroonexit
@@ -628,7 +642,23 @@ class RevPiPyLoad():
# TODO: Nur "Devices" list vergleich da HASH immer neu wegen timestamp # TODO: Nur "Devices" list vergleich da HASH immer neu wegen timestamp
with open(proginit.pargs.configrsc, "rb") as fh: with open(proginit.pargs.configrsc, "rb") as fh:
file_hash = md5(fh.read()).digest() rsc_buff = fh.read()
# Check change of RevPiLED address
self.revpi_led_address = get_revpiled_address(rsc_buff)
if self.plc is not None and self.plc.is_alive():
if self.revpi_led_address >= 0:
self.plc.softdog.address = self.revpi_led_address
elif self.plcprogram_watchdog > 0:
# Stop plc program, if watchdog is needed but not found
proginit.logger.error(
"stop plc program, because watchdog is activated but "
"no address was found in piCtory configuration"
)
self.plc.stop()
self.plc.join()
file_hash = md5(rsc_buff).digest()
if picontrolserver.HASH_PICT == file_hash: if picontrolserver.HASH_PICT == file_hash:
return False return False
picontrolserver.HASH_PICT = file_hash picontrolserver.HASH_PICT = file_hash
@@ -1151,7 +1181,7 @@ class RevPiPyLoad():
"autoreloaddelay": "[0-9]+", "autoreloaddelay": "[0-9]+",
"autostart": "[01]", "autostart": "[01]",
"plcprogram": ".+", "plcprogram": ".+",
"plcprogram_watchdog": "[01]", "plcprogram_watchdog": "[0-9]+",
"plcarguments": ".*", "plcarguments": ".*",
"plcworkdir_set_uid": "[01]", "plcworkdir_set_uid": "[01]",
# "plcuid": "[0-9]{,5}", # "plcuid": "[0-9]{,5}",

View File

@@ -6,11 +6,164 @@ __license__ = "GPLv3"
import os import os
from fcntl import ioctl from fcntl import ioctl
from threading import Thread from random import random
from struct import pack, unpack
from subprocess import Popen
from threading import Thread, Event
from time import time
import proginit as pi import proginit as pi
class SoftwareWatchdog:
def __init__(self, address: int, timeout: int, kill_process=None):
"""
Software watchdog thread, which must be recreate if triggered.
:param address: Byte address of RevPiLED byte
:param timeout: Timeout to trigger watchdog on no change of bit
:param kill_process: Process to kill on trigger
"""
self.__th = Thread()
self._exit = Event()
self._ioctl_bytes = b''
self._process = None
self._timeout = 0.0
self.triggered = False
# Process and check the values
self.address = address
self.kill_process = kill_process
# The timeout property will start/stop the thread
self.timeout = timeout
def __th_run(self):
"""Thread function for watchdog."""
pi.logger.debug("enter SoftwareWatchdog.__th_run()")
# Startup delay to let the python program start and trigger
# todo: Is this fix value okay?
if self._exit.wait(2.0):
pi.logger.debug("leave SoftwareWatchdog.__th_run()")
return
fd = os.open(pi.pargs.procimg, os.O_RDONLY)
mrk = self._ioctl_bytes
tmr = time()
# Random wait value 0.0-0.1 to become async to process image
while not self._exit.wait(random() / 10):
try:
# Get SoftWatchdog bit
bit_7 = ioctl(fd, 19215, self._ioctl_bytes)
except Exception:
pass
else:
if bit_7 != mrk:
# Toggling detected, wait the rest of time to free cpu
self._exit.wait(self._timeout - (time() - tmr))
mrk = bit_7
tmr = time()
continue
if time() - tmr >= self._timeout:
pi.logger.debug("software watchdog timeout reached")
self.triggered = True
if self._process is not None:
self._process.kill()
pi.logger.warning(
"process killed by software watchdog"
)
break
os.close(fd)
pi.logger.debug("leave SoftwareWatchdog.__th_run()")
def reset(self):
"""Reset watchdog functions after triggered or stopped."""
pi.logger.debug("enter SoftwareWatchdog.reset()")
self._exit.clear()
self.triggered = False
# The timeout property will start / stop the thread
self.timeout = int(self.timeout)
pi.logger.debug("leave SoftwareWatchdog.reset()")
def stop(self):
"""Shut down watchdog task and wait for exit."""
pi.logger.debug("enter SoftwareWatchdog.stop()")
self._exit.set()
if self.__th.is_alive():
self.__th.join()
pi.logger.debug("leave SoftwareWatchdog.stop()")
@property
def address(self) -> int:
"""Byte address of RevPiLED byte."""
return unpack("<Hxx", self._ioctl_bytes)[0]
@address.setter
def address(self, value: int) -> None:
"""Byte address of RevPiLED byte."""
if not isinstance(value, int):
raise TypeError("address must be <class 'int'>")
if not 0 <= value < 4096:
raise ValueError("address must be 0 - 4095")
# Use Bit 7 of RevPiLED byte (wd of Connect device)
self._ioctl_bytes = pack("<HBx", value, 7)
pi.logger.debug("set software watchdog address to {0}".format(value))
@property
def kill_process(self) -> Popen:
return self._process
@kill_process.setter
def kill_process(self, value: Popen) -> None:
if not (value is None or isinstance(value, Popen)):
raise TypeError("kill_process must be <class 'subprocess.Popen'>")
self._process = value
@property
def timeout(self) -> int:
"""Timeout to trigger watchdog on no change of bit."""
return int(self._timeout)
@timeout.setter
def timeout(self, value: int):
"""
Timeout to trigger watchdog on no change of bit.
Value in seconds, 0 will stop watchdog monitoring.
"""
if not isinstance(value, int):
raise TypeError("timeout must be <class 'int'>")
if value < 0:
raise ValueError(
"timeout value must be 0 to disable or a positive number"
)
if value == 0:
# A value of 0 will stop the watchdog thread
self.stop()
self._timeout = 0.0
else:
self._timeout = float(value)
if not (self.triggered or
self._exit.is_set() or self.__th.is_alive()):
self.__th = Thread(target=self.__th_run)
self.__th.start()
pi.logger.debug(
"set software watchdog timeout to {0} seconds"
"".format(value)
)
class ResetDriverWatchdog(Thread): class ResetDriverWatchdog(Thread):
"""Watchdog to catch a piCtory reset_driver action.""" """Watchdog to catch a piCtory reset_driver action."""