Bugfix RtLevel, software watchdog for PLC Program

RtLevel was not set on restart plc program after crash
Software watchdog will restart plc program, if it does not toggle core.wd
in the configured time. It is the same bit and function as on connect
device to trigger hardware watchdog (bit 7 in RevPiLED).
This commit is contained in:
2020-05-01 22:59:10 +02:00
parent d77fea0b48
commit beb073893b
5 changed files with 262 additions and 34 deletions

View File

@@ -2,6 +2,7 @@
<dictionary name="akira">
<words>
<w>ctory</w>
<w>popen</w>
</words>
</dictionary>
</component>

View File

@@ -1,13 +1,16 @@
# -*- coding: utf-8 -*-
"""Helperfunktionen fuer das gesamte RevPiPyLoad-System."""
__author__ = "Sven Sager"
__copyright__ = "Copyright (C) 2018 Sven Sager"
__copyright__ = "Copyright (C) 2020 Sven Sager"
__license__ = "GPLv3"
import os
import proginit
from json import load, loads
from re import match as rematch
from subprocess import Popen, PIPE
import proginit
def _setuprt(pid, evt_exit):
"""Konfiguriert Programm fuer den RT-Scheduler.
@@ -117,6 +120,32 @@ def _zeroprocimg():
)
def get_revpiled_address(configrsc_bytes: bytes) -> int:
"""
Find byte address of revpiled output.
:return: Address or -1 on error
"""
try:
rsc = loads(configrsc_bytes.decode()) # type: dict
except Exception:
return -1
byte_address = -1
for device in rsc.get("Devices", ()): # type: dict
if device.get("type", "") == "BASE":
try:
byte_address = device["offset"] + int(device["out"]["0"][3])
proginit.logger.debug(
"found revpi_led_address on {0} byte".format(byte_address)
)
except Exception:
pass
break
return byte_address
def refullmatch(regex, string):
"""re.fullmatch wegen alter python version aus wheezy nachgebaut.

View File

@@ -1,18 +1,20 @@
# -*- coding: utf-8 -*-
"""Modul fuer die Verwaltung der PLC Funktionen."""
__author__ = "Sven Sager"
__copyright__ = "Copyright (C) 2018 Sven Sager"
__copyright__ = "Copyright (C) 2020 Sven Sager"
__license__ = "GPLv3"
import os
import proginit
import shlex
import subprocess
from logsystem import PipeLogwriter
from helper import _setuprt, _zeroprocimg
from sys import stdout as sysstdout
from threading import Event, Thread
from time import sleep, asctime
import proginit
from helper import _setuprt, _zeroprocimg
from logsystem import PipeLogwriter
from watchdogs import SoftwareWatchdog
class RevPiPlc(Thread):
@@ -45,6 +47,16 @@ class RevPiPlc(Thread):
self.zeroonerror = False
self.zeroonexit = False
# Software watchdog
self.softdog = SoftwareWatchdog(0, 0)
def __exec_rtlevel(self):
"""RealTime Scheduler nutzen nach 5 Sekunden Programmvorlauf."""
if self.rtlevel > 0 \
and not self._evt_exit.wait(5) \
and self._procplc.poll() is None:
_setuprt(self._procplc.pid, self._evt_exit)
def __get_autoreloaddelay(self):
"""Getter fuer autoreloaddelay.
@return Delayzeit in Sekunden <class 'int'>"""
@@ -103,6 +115,11 @@ class RevPiPlc(Thread):
stderr=subprocess.STDOUT
)
proginit.logger.debug("leave RevPiPlc._spopen()")
# Pass process to watchdog
self.softdog.kill_process = sp
self.softdog.reset()
return sp
def newlogfile(self):
@@ -138,12 +155,7 @@ class RevPiPlc(Thread):
# Prozess erstellen
proginit.logger.info("start plc program {0}".format(self._program))
self._procplc = self._spopen(lst_proc)
# RealTime Scheduler nutzen nach 5 Sekunden Programmvorlauf
if self.rtlevel > 0 \
and not self._evt_exit.wait(5) \
and self._procplc.poll() is None:
_setuprt(self._procplc.pid, self._evt_exit)
self.__exec_rtlevel()
# Überwachung starten
while not self._evt_exit.is_set():
@@ -152,22 +164,11 @@ class RevPiPlc(Thread):
self.exitcode = self._procplc.poll()
if self.exitcode is not None:
# Do nothing, if delay for restart is running
if self._delaycounter == self._autoreloaddelay:
if self.exitcode > 0:
# PLC Python Programm abgestürzt
proginit.logger.error(
"plc program crashed - exitcode: {0}".format(
self.exitcode
)
)
if self.zeroonerror:
_zeroprocimg()
proginit.logger.warning(
"set piControl0 to ZERO after "
"PLC program error"
)
self.softdog.stop()
else:
if self.exitcode == 0:
# PLC Python Programm sauber beendet
proginit.logger.info("plc program did a clean exit")
if self.zeroonexit:
@@ -176,6 +177,18 @@ class RevPiPlc(Thread):
"set piControl0 to ZERO after "
"PLC program returns clean exitcode"
)
else:
# PLC Python Programm abgestürzt
proginit.logger.error(
"plc program crashed - exitcode: {0}"
"".format(self.exitcode)
)
if self.zeroonerror:
_zeroprocimg()
proginit.logger.warning(
"set piControl0 to ZERO after "
"PLC program error"
)
if not self._evt_exit.is_set() and self.autoreload:
self._delaycounter -= 1
@@ -192,6 +205,7 @@ class RevPiPlc(Thread):
proginit.logger.warning(
"restart plc program after crash"
)
self.__exec_rtlevel()
else:
break
@@ -211,6 +225,7 @@ class RevPiPlc(Thread):
proginit.logger.info("stop revpiplc thread")
self._evt_exit.set()
self.softdog.stop()
# Prüfen ob es einen subprocess gibt
if self._procplc is None:

View File

@@ -26,7 +26,7 @@ begrenzt werden!
"""
__author__ = "Sven Sager"
__copyright__ = "Copyright (C) 2018 Sven Sager"
__copyright__ = "Copyright (C) 2020 Sven Sager"
__license__ = "GPLv3"
__version__ = "0.8.5"
@@ -48,7 +48,7 @@ import logsystem
import picontrolserver
import plcsystem
import proginit
from helper import refullmatch
from helper import refullmatch, get_revpiled_address
from shared.ipaclmanager import IpAclManager
from watchdogs import ResetDriverWatchdog
from xrpcserver import SaveXMLRPCServer
@@ -81,6 +81,7 @@ class RevPiPyLoad():
self.pictorymtime = 0
self.replaceiosmtime = 0
self.replaceiofail = False
self.revpi_led_address = -1
# Berechtigungsmanger
if proginit.pargs.developermode:
@@ -219,8 +220,8 @@ class RevPiPyLoad():
"plcworkdir", ".")
self.plcprogram = self.globalconfig["DEFAULT"].get(
"plcprogram", "none.py")
self.plcprogram_watchdog = self.globalconfig["DEFAULT"].getboolean(
"plcprogram_watchdog", False)
self.plcprogram_watchdog = self.globalconfig["DEFAULT"].getint(
"plcprogram_watchdog", 0)
self.plcarguments = self.globalconfig["DEFAULT"].get(
"plcarguments", "")
self.plcworkdir_set_uid = self.globalconfig["DEFAULT"].getboolean(
@@ -374,6 +375,7 @@ class RevPiPyLoad():
)
self.plc.autoreload = self.autoreload
self.plc.autoreloaddelay = self.autoreloaddelay
self.plc.softdog.timeout = self.plcprogram_watchdog
self.plc.zeroonerror = self.zeroonerror
self.plc.zeroonexit = self.zeroonexit
@@ -547,7 +549,6 @@ class RevPiPyLoad():
"""Konfiguriert den PLC-Thread fuer die Ausfuehrung.
@return PLC-Thread Object or None"""
proginit.logger.debug("enter RevPiPyLoad._plcthread()")
th_plc = None
# Prüfen ob Programm existiert
if not os.path.exists(os.path.join(self.plcworkdir, self.plcprogram)):
@@ -556,6 +557,16 @@ class RevPiPyLoad():
))
return None
# Check software watchdog
if self.revpi_led_address < 0 < self.plcprogram_watchdog:
proginit.logger.error(
"can not start plc program, because watchdog is activated "
"but no address was found in piCtory configuration"
)
return None
th_plc = None
proginit.logger.debug("create PLC program watcher")
th_plc = plcsystem.RevPiPlc(
os.path.join(self.plcworkdir, self.plcprogram),
@@ -567,6 +578,9 @@ class RevPiPyLoad():
th_plc.gid = self.plcgid
th_plc.uid = self.plcuid
th_plc.rtlevel = self.rtlevel
th_plc.softdog.address = \
0 if self.revpi_led_address < 0 else self.revpi_led_address
th_plc.softdog.timeout = self.plcprogram_watchdog
th_plc.zeroonerror = self.zeroonerror
th_plc.zeroonexit = self.zeroonexit
@@ -628,7 +642,23 @@ class RevPiPyLoad():
# TODO: Nur "Devices" list vergleich da HASH immer neu wegen timestamp
with open(proginit.pargs.configrsc, "rb") as fh:
file_hash = md5(fh.read()).digest()
rsc_buff = fh.read()
# Check change of RevPiLED address
self.revpi_led_address = get_revpiled_address(rsc_buff)
if self.plc is not None and self.plc.is_alive():
if self.revpi_led_address >= 0:
self.plc.softdog.address = self.revpi_led_address
elif self.plcprogram_watchdog > 0:
# Stop plc program, if watchdog is needed but not found
proginit.logger.error(
"stop plc program, because watchdog is activated but "
"no address was found in piCtory configuration"
)
self.plc.stop()
self.plc.join()
file_hash = md5(rsc_buff).digest()
if picontrolserver.HASH_PICT == file_hash:
return False
picontrolserver.HASH_PICT = file_hash
@@ -1151,7 +1181,7 @@ class RevPiPyLoad():
"autoreloaddelay": "[0-9]+",
"autostart": "[01]",
"plcprogram": ".+",
"plcprogram_watchdog": "[01]",
"plcprogram_watchdog": "[0-9]+",
"plcarguments": ".*",
"plcworkdir_set_uid": "[01]",
# "plcuid": "[0-9]{,5}",

View File

@@ -6,11 +6,164 @@ __license__ = "GPLv3"
import os
from fcntl import ioctl
from threading import Thread
from random import random
from struct import pack, unpack
from subprocess import Popen
from threading import Thread, Event
from time import time
import proginit as pi
class SoftwareWatchdog:
def __init__(self, address: int, timeout: int, kill_process=None):
"""
Software watchdog thread, which must be recreate if triggered.
:param address: Byte address of RevPiLED byte
:param timeout: Timeout to trigger watchdog on no change of bit
:param kill_process: Process to kill on trigger
"""
self.__th = Thread()
self._exit = Event()
self._ioctl_bytes = b''
self._process = None
self._timeout = 0.0
self.triggered = False
# Process and check the values
self.address = address
self.kill_process = kill_process
# The timeout property will start/stop the thread
self.timeout = timeout
def __th_run(self):
"""Thread function for watchdog."""
pi.logger.debug("enter SoftwareWatchdog.__th_run()")
# Startup delay to let the python program start and trigger
# todo: Is this fix value okay?
if self._exit.wait(2.0):
pi.logger.debug("leave SoftwareWatchdog.__th_run()")
return
fd = os.open(pi.pargs.procimg, os.O_RDONLY)
mrk = self._ioctl_bytes
tmr = time()
# Random wait value 0.0-0.1 to become async to process image
while not self._exit.wait(random() / 10):
try:
# Get SoftWatchdog bit
bit_7 = ioctl(fd, 19215, self._ioctl_bytes)
except Exception:
pass
else:
if bit_7 != mrk:
# Toggling detected, wait the rest of time to free cpu
self._exit.wait(self._timeout - (time() - tmr))
mrk = bit_7
tmr = time()
continue
if time() - tmr >= self._timeout:
pi.logger.debug("software watchdog timeout reached")
self.triggered = True
if self._process is not None:
self._process.kill()
pi.logger.warning(
"process killed by software watchdog"
)
break
os.close(fd)
pi.logger.debug("leave SoftwareWatchdog.__th_run()")
def reset(self):
"""Reset watchdog functions after triggered or stopped."""
pi.logger.debug("enter SoftwareWatchdog.reset()")
self._exit.clear()
self.triggered = False
# The timeout property will start / stop the thread
self.timeout = int(self.timeout)
pi.logger.debug("leave SoftwareWatchdog.reset()")
def stop(self):
"""Shut down watchdog task and wait for exit."""
pi.logger.debug("enter SoftwareWatchdog.stop()")
self._exit.set()
if self.__th.is_alive():
self.__th.join()
pi.logger.debug("leave SoftwareWatchdog.stop()")
@property
def address(self) -> int:
"""Byte address of RevPiLED byte."""
return unpack("<Hxx", self._ioctl_bytes)[0]
@address.setter
def address(self, value: int) -> None:
"""Byte address of RevPiLED byte."""
if not isinstance(value, int):
raise TypeError("address must be <class 'int'>")
if not 0 <= value < 4096:
raise ValueError("address must be 0 - 4095")
# Use Bit 7 of RevPiLED byte (wd of Connect device)
self._ioctl_bytes = pack("<HBx", value, 7)
pi.logger.debug("set software watchdog address to {0}".format(value))
@property
def kill_process(self) -> Popen:
return self._process
@kill_process.setter
def kill_process(self, value: Popen) -> None:
if not (value is None or isinstance(value, Popen)):
raise TypeError("kill_process must be <class 'subprocess.Popen'>")
self._process = value
@property
def timeout(self) -> int:
"""Timeout to trigger watchdog on no change of bit."""
return int(self._timeout)
@timeout.setter
def timeout(self, value: int):
"""
Timeout to trigger watchdog on no change of bit.
Value in seconds, 0 will stop watchdog monitoring.
"""
if not isinstance(value, int):
raise TypeError("timeout must be <class 'int'>")
if value < 0:
raise ValueError(
"timeout value must be 0 to disable or a positive number"
)
if value == 0:
# A value of 0 will stop the watchdog thread
self.stop()
self._timeout = 0.0
else:
self._timeout = float(value)
if not (self.triggered or
self._exit.is_set() or self.__th.is_alive()):
self.__th = Thread(target=self.__th_run)
self.__th.start()
pi.logger.debug(
"set software watchdog timeout to {0} seconds"
"".format(value)
)
class ResetDriverWatchdog(Thread):
"""Watchdog to catch a piCtory reset_driver action."""