#!/usr/bin/python3

"""
    workspace++

    ws_expirer

    python version of ws_expirer command, only for root

    to be called from a cronjob to expire workspaces, does delete the data as well
    Reads new YAML configuration files and new YAML workspace database.

    (c) Holger Berger 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2022, 2023, 2024, 2026
    (c) Bernd Krischok 2017, 2020
    (c) Christoph Niethammer 2020
    (c) Michael Mueller, Martin Schroschk 2023, 2024

    workspace++ is based on workspace by Holger Berger, Thomas Beisel, Martin Hecht
    and Adrian Reber

    workspace++ is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation, either version 3 of the License, or
    (at your option) any later version.

    workspace++ is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with workspace++.  If not, see <http://www.gnu.org/licenses/>.

"""

import os, sys
import glob
import time
import smtplib
import os.path
import shutil
from email.mime.text import MIMEText
from email.mime.multipart import MIMEMultipart
import socket
import signal


# read a single line from ws.conf of the form: pythonpath: /path/to/python
def read_python_conf():
    for l in open("/etc/ws.conf", "r"):
        if "pythonpath" in l:
            key = l.split(":")[0].strip()
            value = l.split(":")[1].strip()
            if key == "pythonpath":
                if os.path.isdir(value):
                    sys.path.append(value)
                    break
                else:
                    print("Warning: Invalid pythonpath in ws.conf", file=sys.stderr)


# read config first as it might change python path
read_python_conf()
import yaml


count = 0


class TimeOut(Exception):
    pass


def handler(signum, frame):
    # print(f"starting Timeouthandler")
    raise TimeOut("end of time")


# send a reminder email
def send_reminder(smtphost, clustername, wsname, fsname, expiration, mailaddress):
    exptime = time.strftime("%a %b %d %H:%M:%S %Y %z", time.localtime(expiration))
    text = """
        Your workspace %s on filesystem %s at HPC system %s will expire at %s.
    """ % (
        wsname,
        fsname,
        clustername,
        exptime,
    )
    msg = MIMEMultipart("mixed")
    recipients = [mailaddress]
    try:
        sender = config["mail_from"]
    except KeyError:
        sender = "wsadmin"
    msg["From"] = sender
    msg["To"] = mailaddress
    msg["Subject"] = "Workspace %s will expire at %s" % (wsname, exptime)
    msg.preamble = text
    msg.attach(MIMEText(text, "html"))

    try:
        s = smtplib.SMTP(host=smtphost)
        s.sendmail(sender, recipients, msg.as_string())
        s.quit()
    except smtplib.SMTPRecipientsRefused:
        print("Recipient refused: {}".format(str(recipients)))
    except smtplib.SMTPSenderRefused:
        print("Sender refused: {}".format(str(sender)))
    except socket.error:
        print("Socket error")
    except:  # something went wrong
        print("Could not send reminder email. Other reason.", recipients)


def senderrormail(text):
    msg = MIMEMultipart("mixed")
    recipients = adminmail
    try:
        sender = config["mail_from"]
    except KeyError:
        sender = "wsadmin"
    msg["From"] = sender
    msg["To"] = recipients
    msg["Subject"] = text
    msg.preamble = text + "\n please check workspace DB entry and may be restore some meaningfull entry if workspace should not get deleted"
    msg.attach(MIMEText(text, "html"))

    try:
        s = smtplib.SMTP(host=smtphost)
        s.sendmail(sender, recipients, msg.as_string())
        s.quit()
    except smtplib.SMTPRecipientsRefused:
        print("Recipient refused: {}".format(str(recipients)))
    except smtplib.SMTPSenderRefused:
        print("Sender refused: {}".format(str(sender)))
    except socket.error:
        print("Socket error")
    except:  # something went wrong
        print("Could not send reminder email. Other reason.", recipients)



# fast recursive deleter, using new python mechanisms
def fastdeldir(dir):
    print("   deldir(fast)", dir)
    try:
        if not os.path.exists(dir):
            print("Error: Path to delete does not exist: %s" % dir)
            return
        shutil.rmtree(dir)
    except TimeOut:
        print(
            f"aborting fastdeldir of {dir} due to timelimit {deldir_timelimit}s"
        )  # f"" introduces python 3.6 dependency


# slow recursive deleter, to avoid high meta data pressure on servers
#  deprecated, has security impact
def slowdeldir(dir):
    global count
    print("   deldir(slow)", dir)
    try:
        os.chmod(dir, 0o000)
        if not os.path.exists(dir):
            return
        for obj in os.listdir(dir):
            fullname = os.path.join(dir, obj)
            if os.path.isdir(fullname) and not os.path.islink(fullname):
                os.chmod(dir, 0o000)
                slowdeldir(fullname)
                time.sleep(0.01)
                os.rmdir(fullname)
                time.sleep(0.01)
            else:
                if os.path.isfile(fullname):
                    size = os.path.getsize(fullname)
                else:
                    size = 0.001
                os.unlink(fullname)
                time.sleep(max(0.001, size * 0.00000000001))
                count += 1
                if count % 10 == 0:
                    time.sleep(0.05)
                if count % 100 == 0:
                    time.sleep(0.1)
                    count = 0
    except TimeOut:
        print(
            f"aborting fastdeldir of {dir} due to timelimit {deldir_timelimit}s"
        )  # f"" introduces python 3.6 dependency


# getting old workspace database informations (path and expiration date)
def get_old_db_entry_informations(dbfile):
    D = {}
    with open(dbfile, "r") as f:
        try:
            d = f.readlines()
            D = {"expiration": d[0].split("\n")[0], "workspace": d[1].split("\n")[0]}
        except (IOError, IndexError) as e:
            print("Exception while trying to read {}. {}".format(dbfile, e), file=sys.stderr)
    return D


# collect all the workspace paths of all db entries
def get_dbentriesws(dbfullpathlist):
    W = []
    for dbentryfilename in dbfullpathlist:
        try:
            dbentry = yaml.safe_load(open(dbentryfilename))
            workspace = dbentry["workspace"]
            W.append(workspace)
        except:
            try:
                dbentry = get_old_db_entry_informations(dbentryfilename)
                workspace = dbentry["workspace"]
                W.append(workspace)
            except:
                print("Empty DB entry {0}, bailing out to avoid data loss! Manual intervention required!".format(dbentryfilename), file=sys.stderr)
                senderrormail("empty workspace db entry "+dbentryfilename)
                sys.exit(-1)
                pass
    return W


# Options Parsing ...
def vararg_callback(option, opt_str, value, parser):
    assert value is None
    done = 0
    value = []
    rargs = parser.rargs
    while rargs:
        arg = rargs[0]
        # Stop if we hit an arg like "--foo", "-a", "-fx", "--file=f",
        # etc.  Note that this also stops on "-3" or "-3.0", so if
        # your option takes numeric values, you will need to handle
        # this.
        if (arg[:2] == "--" and len(arg) > 2) or (arg[:1] == "-" and len(arg) > 1 and arg[1] != "-"):
            break
        else:
            value.append(arg)
            del rargs[0]
    setattr(parser.values, option.dest, value)


def processOpts():
    import optparse

    parser = optparse.OptionParser()
    parser.add_option(
        "-w",
        "--workspaces",
        action="callback",
        dest="fslist",
        callback=vararg_callback,
        help="pass a list of workspace filesystems to clean up (whitespace-separated)",
    )
    parser.add_option(
        "-c",
        "--cleaner",
        dest="cleaner",
        action="store_true",
        default=False,
        help="enable cleanup run (default is dry run)",
    )
    parser.add_option(
        "-s",
        "--space",
        action="callback",
        dest="single_space",
        callback=vararg_callback,
        default="",
        help="pass a single space (inside a workspace) to clean up"
    )
    (options, args) = parser.parse_args()
    if not options:
        print("*** FATAL: No options defined. ***")
        sys.exit(1)

    return options


if os.getuid() != 0:
    print("Error: you are not root.", file=sys.stderr)
    sys.exit(-1)

# load config file
config = yaml.safe_load(open("/etc/ws.conf"))

# choose one of the two
deldir = slowdeldir   # deprecated!
deldir = fastdeldir

smtphost = config["smtphost"]
adminmail = config["adminmail"]
clustername = config["clustername"]
try:
    deldir_timelimit = config["deldir_timeout"]
except KeyError:
    print("Warning: no deldir_timeout in config, defaulting", file=sys.stderr)
    deldir_timelimit = 3600 * 24 * 365  # some huge default to not affect people not updating config file

start = time.time()

print("start of expirer run", time.ctime())

dryrun = True


# Get the command options
opts = {}
opts = processOpts()
fslist = []
if not opts.fslist:
    for fs in config["workspaces"]:
        fslist.append(fs)
else:
    fslist = opts.fslist

single_space = ""
try:
    single_space = opts.single_space[0]
    print(single_space)
except IndexError:
    single_space = ""

if fslist == []:
    print("Error: no workspace defined")
    sys.exit(2)

if not opts.cleaner:
    dryrun = True
    print("simulate cleaning ... (dryrun)")
else:
    dryrun = False
    print("really cleaning ...")

# register timout handler
signal.signal(signal.SIGALRM, handler)

# cleanup stray directories, this removes stuff that was released (no DB entry any more)
# from spaces, and checks if anything is left over in removed state for whatever reasons
# this searches over workspaces and checks DB
for fs in fslist:
    # first for visible workspaces
    try:
        dbdir = config["workspaces"][fs]["database"]
    except KeyError:
        print("  FAILED to access", fs, "in config file")
        continue
    spaces = config["workspaces"][fs]["spaces"].copy()
    # avoid datarace, fetch directories first and db entries second (1),
    # so entries created during this run of expirer will be ignored
    #  this eats memory, but... generators from python3 pathlib.Path.glob
    #  would reintroduce the race.
    workspaces = {}
    for space in spaces:
        workspaces[space] = glob.glob(os.path.join(space, "*-*"))

    # check for magic in DB entry, to avoid that DB is not existing and all workspaces get wiped by accident, e.g. due to mouting problems if DB is not
    # in same FS.
    try:
        with open(os.path.join(dbdir, ".ws_db_magic")) as dbmagic:
            magic=dbmagic.readline().strip()
            if magic != fs:
                print("DB directory {0} does not contain .ws_db_magic with workspace name in it, skipping to avoid data loss. Please check!".format(dbdir), file=sys.stderr)
                senderrormail("DB directory {0} does not contain .ws_db_magic with workspace name in it, skipping to avoid data loss. Please check!".format(dbdir))
                continue
    except FileNotFoundError:
        print("DB directory {0} does not contain .ws_db_magic, skipping to avoid data loss. Please check!".format(dbdir), file=sys.stderr)
        senderrormail("DB directory {0} does not contain .ws_db_magic, skipping to avoid data loss. Please check!".format(dbdir))
        continue

    dbentries = glob.glob(os.path.join(dbdir, "*-*"))
    dbentrynames = list(map(os.path.basename, dbentries))  # (1)
    dbentriesws = get_dbentriesws(dbentries)
    dbentryworkspaces = list(map(os.path.basename, dbentriesws))
    workspacedelprefix = config["workspaces"][fs]["deleted"]
    if single_space != "":
        spaces.remove(single_space)
        print("PHASE: checking for stray workspaces for", fs, dbdir, single_space, ", ignoring ", spaces)
        spaces = [single_space]
    else:
        print("PHASE: checking for stray workspaces for", fs, dbdir, spaces)
    for space in spaces:
        for ws in workspaces[space]:  # (2) for for #87
            if os.path.basename(ws) not in dbentryworkspaces and os.path.basename(ws) not in dbentrynames:  # added for #139
                print("  stray workspace", ws)
                # FIXME: this could fail on scatefs, should fallback to 'mv'. Lustre DNE2 cross MDT renames work well meanwhile
                # FIXME: a stray workspace will be moved to deleted here, and will be deleted in
                # the same run in (3). Is this intended? dangerous with datarace #87
                timestamp = str(int(time.time()))
                if not dryrun:
                    try:
                        os.rename(
                            ws,
                            os.path.join(
                                os.path.dirname(ws), workspacedelprefix, os.path.basename(ws) + "-" + timestamp
                            ),
                        )
                        print(
                            "  OS.RENAME",
                            ws,
                            os.path.join(
                                os.path.dirname(ws), workspacedelprefix, os.path.basename(ws) + "-" + timestamp
                            ),
                        )
                    except os.error:
                        print(
                            "  OS.RENAME FAILED",
                            ws,
                            os.path.join(
                                os.path.dirname(ws), workspacedelprefix, os.path.basename(ws) + "-" + timestamp
                            ),
                        )
                else:
                    print(
                        "  MV",
                        ws,
                        os.path.join(os.path.dirname(ws), workspacedelprefix, os.path.basename(ws) + "-" + timestamp),
                    )
            else:
                print("  valid workspace", ws)

    # second for removed workspaces (3)
    #  ws_release moves DB entry first, workspace second, should be race free
    dbdelentries = glob.glob(os.path.join(dbdir, config["workspaces"][fs]["deleted"], "*-*"))
    dbdelentrynames = list(map(os.path.basename, dbdelentries))
    for space in spaces:
        for ws in glob.glob(os.path.join(space, config["workspaces"][fs]["deleted"], "*-*")):
            if os.path.basename(ws) not in dbdelentrynames:
                print("  stray removed workspace", ws)
                if not dryrun:
                    signal.alarm(deldir_timelimit)
                    deldir(ws)
                    signal.alarm(0)
                else:
                    print("  DELDIR", ws)
            else:
                print("  valid removed workspace", ws)


# expire the workspaces by moving them into deleted spaces, dbentry + workspace itself
# this searches over DB, nothing not in DB will be touched
for fs in fslist:
    try:
        spaces = config["workspaces"][fs]["spaces"].copy()
    except KeyError:
        print("  FAILED to access", fs, "in config file")
        continue
    dbdeldir = os.path.join(config["workspaces"][fs]["database"], config["workspaces"][fs]["deleted"])
    workspacedelprefix = config["workspaces"][fs]["deleted"]
    dbdir = config["workspaces"][fs]["database"]

    # check for magic in DB entry, to avoid that DB is not existing and all workspaces get wiped by accident, e.g. due to mouting problems if DB is not
    # in same FS.
    try:
        with open(os.path.join(dbdir, ".ws_db_magic")) as dbmagic:
            magic=dbmagic.readline().strip()
            if magic != fs:
                print("DB directory {0} does not contain .ws_db_magic with workspace name in it, skipping to avoid data loss. Please check!".format(dbdir), file=sys.stderr)
                senderrormail("DB directory {0} does not contain .ws_db_magic with workspace name in it, skipping to avoid data loss. Please check!".format(dbdir))
                continue
    except FileNotFoundError:
        print("DB directory {0} does not contain .ws_db_magic, skipping to avoid data loss. Please check!".format(dbdir), file=sys.stderr)
        senderrormail("DB directory {0} does not contain .ws_db_magic, skipping to avoid data loss. Please check!".format(dbdir))
        continue

    if single_space != "":
        spaces.remove(single_space)
        print("checking for workspaces to be expired for", fs, dbdir, single_space, ", ignoring ", spaces)
        spaces = [single_space]
    else:
        print("PHASE: checking for workspaces to be expired for", fs, dbdir, spaces)
    for dbentryfilename in glob.glob(os.path.join(dbdir, "*-*")):
        reminder = 0
        mailaddress = ""
        workspace = ""
        expiration = 0
        dbentry = []
        try:
            dbentry = yaml.safe_load(open(dbentryfilename))
            reminder = int(dbentry["reminder"])
            mailaddress = dbentry["mailaddress"]
        except:
            try:
                dbentry = get_old_db_entry_informations(dbentryfilename)
                reminder = 0
                mailaddress = ""
            except Exception as e:
                print("   ERROR: could not read entry, ", e)

        if len(dbentry) == 0:
            print("   ERROR, skipping empty db entry:", dbentryfilename)
            continue
            # FIXME: what happens with workspace if one exists that matches this empty DB entry? can only
            # be found in above file based search. this removes the directory as no DB entry matches
            # the path, but it would not remove the entry. entry will go stale, workspace will get deleted,

        try:
            expiration = int(dbentry["expiration"])
        except Exception:
            if "expiration" in dbentry.keys():
                print("   ERROR in parsing expiration <", dbentry["expiration"], "> for", dbentryfilename)
            else:
                print("   ERROR missing field <expiration> for", dbentryfilename)
            continue
        workspace = dbentry["workspace"]
        if workspace == "" or expiration == 0:
            print("  FAILED to parse DB for", dbentryfilename)
            continue
        if single_space != "" and single_space not in workspace:
            continue
        if time.time() > expiration:
            print("  expiring", dbentryfilename, "  (expired", time.ctime(expiration), ")")
            timestamp = str(int(time.time()))
            if not dryrun:
                os.rename(dbentryfilename, os.path.join(dbdeldir, os.path.basename(dbentryfilename)) + "-" + timestamp)
                print(
                    "  OS.RENAME",
                    dbentryfilename,
                    os.path.join(dbdeldir, os.path.basename(dbentryfilename)) + "-" + timestamp,
                )
            else:
                print(
                    "  MV", dbentryfilename, os.path.join(dbdeldir, os.path.basename(dbentryfilename)) + "-" + timestamp
                )

            # FIXME: this could fail on scatefs, should fallback to 'mv'
            # while true for scatefs, lustre DNE2 meanwhile handels cross MDT renames well
            if not dryrun:
                try:
                    os.rename(
                        workspace,
                        os.path.join(
                            os.path.dirname(workspace),
                            workspacedelprefix,
                            os.path.basename(dbentryfilename) + "-" + timestamp,
                        ),
                    )
                    print(
                        "  OS.RENAME",
                        workspace,
                        os.path.join(
                            os.path.dirname(workspace),
                            workspacedelprefix,
                            os.path.basename(dbentryfilename) + "-" + timestamp,
                        ),
                    )
                except:
                    print(
                        "  OS.RENAME FAILED",
                        workspace,
                        os.path.join(
                            os.path.dirname(workspace),
                            workspacedelprefix,
                            os.path.basename(dbentryfilename) + "-" + timestamp,
                        ),
                    )
            else:
                print(
                    "  MV",
                    workspace,
                    os.path.join(
                        os.path.dirname(workspace),
                        workspacedelprefix,
                        os.path.basename(dbentryfilename) + "-" + timestamp,
                    ),
                )

        else:
            print("  keeping", dbentryfilename, "  (expires ", time.ctime(expiration), ")")
            if time.time() > (expiration - (reminder * (24 * 3600))):
                # print "  mail needed"
                swsname = os.path.basename(dbentryfilename)[os.path.basename(dbentryfilename).find("-") + 1 :]
                if not dryrun:
                    if mailaddress != "":
                        send_reminder(smtphost, clustername, swsname, fs, expiration, mailaddress)
                        print("  SEND_REMINDER", swsname, expiration, mailaddress)
                else:
                    print("  MAIL", swsname, expiration, mailaddress)


# delete the already expired workspaces which are over "keeptime" days old
# this searches over DB
for fs in fslist:
    try:
        spaces = config["workspaces"][fs]["spaces"].copy()
    except KeyError:
        print("  FAILED to access", fs, "in config file")
        continue
    dbdir = config["workspaces"][fs]["database"]

    # check for magic in DB entry, to avoid that DB is not existing and all workspaces get wiped by accident, e.g. due to mouting problems if DB is not
    # in same FS.
    try:
        with open(os.path.join(dbdir, ".ws_db_magic")) as dbmagic:
            magic=dbmagic.readline().strip()
            if magic != fs:
                print("DB directory {0} does not contain .ws_db_magic with workspace name in it, skipping to avoid data loss. Please check!".format(dbdir), file=sys.stderr)
                senderrormail("DB directory {0} does not contain .ws_db_magic with workspace name in it, skipping to avoid data loss. Please check!".format(dbdir))
                continue
    except FileNotFoundError:
        print("DB directory {0} does not contain .ws_db_magic, skipping to avoid data loss. Please check!".format(dbdir), file=sys.stderr)
        senderrormail("DB directory {0} does not contain .ws_db_magic, skipping to avoid data loss. Please check!".format(dbdir))
        continue

    if single_space != "":
        spaces.remove(single_space)
        print("PHASE: checking for expired workspaces for", fs, dbdir, single_space, ", ignoring ", spaces)
        spaces = [single_space]
    else:
        print("PHASE: checking for expired workspaces for", fs, dbdir, spaces)
    dbdeldir = os.path.join(config["workspaces"][fs]["database"], config["workspaces"][fs]["deleted"])
    keeptime = config["workspaces"][fs]["keeptime"]
    print("  keeptime:", keeptime)
    workspacedelprefix = config["workspaces"][fs]["deleted"]
    for dbentryfilename in glob.glob(os.path.join(dbdeldir, "*-*-*")):
        workspace = ""
        expiration = 0
        try:
            dbentry = yaml.safe_load(open(dbentryfilename))
            expiration = int(dbentry["expiration"])
            workspace = dbentry["workspace"]
        except:
            try:
                dbentry = get_old_db_entry_informations(dbentryfilename)
                expiration = int(dbentry["expiration"])
                workspace = dbentry["workspace"]
            except Exception as e:
                print("  ERROR: could not read db entry, ", e)
        if workspace == "" or expiration == 0:
            print("  FAILED to parse DB for", dbentryfilename)
            continue
        if single_space != "" and single_space not in workspace:
            continue
        # take time of release from filename
        released = dbentryfilename.split("-")[-1]
        try:
            expiration = int(released)
        except ValueError:
            print("   ERROR in parsing expiration <", released, "> for", dbentryfilename)
            continue

        # check if the entry was released or was expired
        try:
            was_released = dbentry["released"]
            # released before 2001, makes no sense, ignore
            if was_released < 1000000000:
                was_released = time.time() + 3600000  # time in future never reached
                print("  IGNORING RELEASED <", released, "> for", dbentryfilename)
        except:
            was_released = time.time() + 3600000  # time in future never reached

        if (time.time() > (expiration + keeptime * 24 * 3600)) or (time.time() > (was_released + 3600)):
            if time.time() > (was_released + 3600):
                print("  deleting", dbentryfilename, "  (was released", time.ctime(was_released), ")")
            else:
                print("  deleting", dbentryfilename, "  (expired", time.ctime(expiration), ")")

            if not dryrun:
                # remove the DB entry
                os.unlink(dbentryfilename)
                print(" OS.UNLINK", dbentryfilename)
                # remove the workspace directory
                signal.alarm(deldir_timelimit)
                deldir(os.path.join(os.path.dirname(workspace), workspacedelprefix, os.path.basename(dbentryfilename)))
                signal.alarm(0)
                print(
                    "  DELDIR",
                    os.path.join(os.path.dirname(workspace), workspacedelprefix, os.path.basename(dbentryfilename)),
                )

                try:
                    os.rmdir(
                        os.path.join(os.path.dirname(workspace), workspacedelprefix, os.path.basename(dbentryfilename))
                    )
                    print(
                        "  OS.RMDIR",
                        os.path.join(os.path.dirname(workspace), workspacedelprefix, os.path.basename(dbentryfilename)),
                    )
                except:
                    pass
            else:
                print("  DELDIR", dbentryfilename)
                print(
                    "  RM",
                    os.path.join(os.path.dirname(workspace), workspacedelprefix, os.path.basename(dbentryfilename)),
                )

        else:
            print(
                "  (keeping further restorable",
                dbentryfilename,
                "until",
                time.ctime(expiration + keeptime * 24 * 3600),
                ")",
            )


end = time.time()
print("end of expirer run after ", end - start, "seconds at", time.ctime())
