ScoDoc/app/scodoc/sco_archives.py

317 lines
12 KiB
Python
Raw Permalink Normal View History

2020-09-26 16:19:37 +02:00
# -*- mode: python -*-
# -*- coding: utf-8 -*-
##############################################################################
#
# Gestion scolarite IUT
#
2023-12-31 23:04:06 +01:00
# Copyright (c) 1999 - 2024 Emmanuel Viennet. All rights reserved.
2020-09-26 16:19:37 +02:00
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
#
# Emmanuel Viennet emmanuel.viennet@viennet.net
#
##############################################################################
"""ScoDoc : gestion des archives des PV et bulletins, et des dossiers etudiants (admission)
2022-12-27 00:13:34 +01:00
Archives are plain files, stored in
<SCODOC_VAR_DIR>/archives/<dept_id>
(where <SCODOC_VAR_DIR> is usually /opt/scodoc-data, and <dept_id> a departement id (int))
2020-09-26 16:19:37 +02:00
Les PV de jurys et documents associés sont stockées dans un sous-repertoire de la forme
<archivedir>/<dept>/<formsemestre_id>/<YYYY-MM-DD-HH-MM-SS>
2021-09-16 21:42:45 +02:00
(formsemestre_id est ici FormSemestre.id)
2020-09-26 16:19:37 +02:00
Les documents liés à l'étudiant sont dans
<archivedir>/docetuds/<dept_id>/<etudid>/<YYYY-MM-DD-HH-MM-SS>
2021-09-16 21:42:45 +02:00
(etudid est ici Identite.id)
2020-09-26 16:19:37 +02:00
Les maquettes Apogée pour l'export des notes sont dans
<archivedir>/apo_csv/<dept_id>/<annee_scolaire>-<sem_id>/<YYYY-MM-DD-HH-MM-SS>/<code_etape>.csv
2022-12-27 00:13:34 +01:00
Un répertoire d'archive contient des fichiers quelconques, et un fichier texte
nommé _description.txt qui est une description (humaine, format libre) de l'archive.
2020-09-26 16:19:37 +02:00
"""
2021-02-05 18:21:34 +01:00
import datetime
import glob
2024-04-15 03:21:32 +02:00
import gzip
import mimetypes
import os
2021-02-04 20:02:44 +01:00
import re
2020-09-26 16:19:37 +02:00
import shutil
import time
2024-04-15 03:21:32 +02:00
import zlib
import chardet
2020-09-26 16:19:37 +02:00
2023-12-22 15:24:13 +01:00
from flask import g
2021-08-01 10:16:16 +02:00
import app.scodoc.sco_utils as scu
2021-05-29 18:22:51 +02:00
from config import Config
2023-12-22 15:24:13 +01:00
from app import log
2024-04-04 11:23:26 +02:00
from app.scodoc.sco_exceptions import ScoException, ScoValueError
2020-09-26 16:19:37 +02:00
class BaseArchiver:
2023-12-22 15:24:13 +01:00
"""Classe de base pour tous les archivers"""
2020-09-26 16:19:37 +02:00
def __init__(self, archive_type=""):
2021-08-29 19:57:32 +02:00
self.archive_type = archive_type
self.initialized = False
self.root = None
self.dept_id = None
def set_dept_id(self, dept_id: int):
"set dept"
self.dept_id = dept_id
2021-08-29 19:57:32 +02:00
def initialize(self, dept_id: int = None):
"""Fixe le département et initialise les répertoires au besoin."""
# Set departement (à chaque fois car peut changer d'une utilisation à l'autre)
self.dept_id = getattr(g, "scodoc_dept_id") if dept_id is None else dept_id
2021-08-29 19:57:32 +02:00
if self.initialized:
return
dirs = [Config.SCODOC_VAR_DIR, "archives"]
2021-08-29 19:57:32 +02:00
if self.archive_type:
dirs.append(self.archive_type)
self.root = os.path.join(*dirs) # /opt/scodoc-data/archives/<type>
2020-09-26 16:19:37 +02:00
log("initialized archiver, path=" + self.root)
path = dirs[0]
2022-04-06 18:51:01 +02:00
for directory in dirs[1:]:
path = os.path.join(path, directory)
2020-09-26 16:19:37 +02:00
try:
2021-02-04 20:02:44 +01:00
scu.GSL.acquire()
2020-09-26 16:19:37 +02:00
if not os.path.isdir(path):
2022-12-27 00:13:34 +01:00
log(f"creating directory {path}")
2020-09-26 16:19:37 +02:00
os.mkdir(path)
finally:
2021-02-04 20:02:44 +01:00
scu.GSL.release()
2021-08-29 19:57:32 +02:00
self.initialized = True
2020-09-26 16:19:37 +02:00
def get_obj_dir(self, oid: int, dept_id: int = None):
2020-09-26 16:19:37 +02:00
"""
:return: path to directory of archives for this object (eg formsemestre_id or etudid).
If directory does not yet exist, create it.
"""
self.initialize(dept_id)
dept_dir = os.path.join(self.root, str(self.dept_id))
2020-09-26 16:19:37 +02:00
try:
2021-02-04 20:02:44 +01:00
scu.GSL.acquire()
2020-09-26 16:19:37 +02:00
if not os.path.isdir(dept_dir):
2022-04-06 18:51:01 +02:00
log(f"creating directory {dept_dir}")
2020-09-26 16:19:37 +02:00
os.mkdir(dept_dir)
2021-08-10 17:12:10 +02:00
obj_dir = os.path.join(dept_dir, str(oid))
2020-09-26 16:19:37 +02:00
if not os.path.isdir(obj_dir):
2022-04-06 18:51:01 +02:00
log(f"creating directory {obj_dir}")
2020-09-26 16:19:37 +02:00
os.mkdir(obj_dir)
2023-05-30 22:23:19 +02:00
except FileExistsError as exc:
raise ScoException(
f"""BaseArchiver error: obj_dir={obj_dir} exists={
os.path.exists(obj_dir)
} isdir={os.path.isdir(obj_dir)}"""
) from exc
2020-09-26 16:19:37 +02:00
finally:
2021-02-04 20:02:44 +01:00
scu.GSL.release()
2020-09-26 16:19:37 +02:00
return obj_dir
def list_oids(self, dept_id: int = None):
2020-09-26 16:19:37 +02:00
"""
:return: list of archive oids
"""
self.initialize(dept_id)
base = os.path.join(self.root, str(self.dept_id)) + os.path.sep
2020-09-26 16:19:37 +02:00
dirs = glob.glob(base + "*")
return [os.path.split(x)[1] for x in dirs]
def list_obj_archives(self, oid: int, dept_id: int = None):
2020-09-26 16:19:37 +02:00
"""Returns
:return: list of archive identifiers for this object (paths to non empty dirs)
"""
self.initialize(dept_id)
base = self.get_obj_dir(oid, dept_id=dept_id) + os.path.sep
2020-09-26 16:19:37 +02:00
dirs = glob.glob(
base
+ "[0-9][0-9][0-9][0-9]-[0-9][0-9]-[0-9][0-9]-[0-9][0-9]-[0-9][0-9]-[0-9][0-9]"
)
2021-07-12 15:13:10 +02:00
dirs = [os.path.join(base, d) for d in dirs]
2020-09-26 16:19:37 +02:00
dirs = [d for d in dirs if os.path.isdir(d) and os.listdir(d)] # non empty dirs
dirs.sort()
return dirs
def delete_archive(self, archive_id: str, dept_id: int = None):
2020-09-26 16:19:37 +02:00
"""Delete (forever) this archive"""
self.initialize(dept_id)
2020-09-26 16:19:37 +02:00
try:
2021-02-04 20:02:44 +01:00
scu.GSL.acquire()
2020-09-26 16:19:37 +02:00
shutil.rmtree(archive_id, ignore_errors=True)
finally:
2021-02-04 20:02:44 +01:00
scu.GSL.release()
2020-09-26 16:19:37 +02:00
2022-12-27 00:13:34 +01:00
def get_archive_date(self, archive_id: str):
2020-09-26 16:19:37 +02:00
"""Returns date (as a DateTime object) of an archive"""
2022-04-06 18:51:01 +02:00
return datetime.datetime(
*[int(x) for x in os.path.split(archive_id)[1].split("-")]
)
2020-09-26 16:19:37 +02:00
def list_archive(self, archive_id: str, dept_id: int = None) -> str:
2020-09-26 16:19:37 +02:00
"""Return list of filenames (without path) in archive"""
self.initialize(dept_id)
2020-09-26 16:19:37 +02:00
try:
2021-02-04 20:02:44 +01:00
scu.GSL.acquire()
2020-09-26 16:19:37 +02:00
files = os.listdir(archive_id)
finally:
2021-02-04 20:02:44 +01:00
scu.GSL.release()
2020-09-26 16:19:37 +02:00
files.sort()
2021-08-10 17:12:10 +02:00
return [f for f in files if f and f[0] != "_"]
2020-09-26 16:19:37 +02:00
2022-12-27 00:13:34 +01:00
def get_archive_name(self, archive_id: str):
2020-09-26 16:19:37 +02:00
"""name identifying archive, to be used in web URLs"""
return os.path.split(archive_id)[1]
2022-12-27 00:13:34 +01:00
def is_valid_archive_name(self, archive_name: str):
2020-09-26 16:19:37 +02:00
"""check if name is valid."""
return re.match(
"^[0-9]{4}-[0-9]{2}-[0-9]{2}-[0-9]{2}-[0-9]{2}-[0-9]{2}$", archive_name
)
def get_id_from_name(self, oid, archive_name: str, dept_id: int = None):
2020-09-26 16:19:37 +02:00
"""returns archive id (check that name is valid)"""
self.initialize(dept_id)
2020-09-26 16:19:37 +02:00
if not self.is_valid_archive_name(archive_name):
2022-09-14 22:47:31 +02:00
raise ScoValueError(f"Archive {archive_name} introuvable")
archive_id = os.path.join(self.get_obj_dir(oid, dept_id=dept_id), archive_name)
2020-09-26 16:19:37 +02:00
if not os.path.isdir(archive_id):
log(
2022-04-06 18:51:01 +02:00
f"invalid archive name: {archive_name}, oid={oid}, archive_id={archive_id}"
2020-09-26 16:19:37 +02:00
)
2022-09-13 10:50:13 +02:00
raise ScoValueError(f"Archive {archive_name} introuvable")
2020-09-26 16:19:37 +02:00
return archive_id
def get_archive_description(self, archive_id: str, dept_id: int = None) -> str:
2020-09-26 16:19:37 +02:00
"""Return description of archive"""
self.initialize(dept_id)
2021-12-12 16:53:52 +01:00
filename = os.path.join(archive_id, "_description.txt")
try:
with open(filename, encoding=scu.SCO_ENCODING) as f:
2021-12-12 16:53:52 +01:00
descr = f.read()
except UnicodeDecodeError:
# some (old) files may have saved under exotic encodings
with open(filename, "rb") as f:
data = f.read()
descr = data.decode(chardet.detect(data)["encoding"])
return descr
2020-09-26 16:19:37 +02:00
def create_obj_archive(self, oid: int, description: str, dept_id: int = None):
2020-09-26 16:19:37 +02:00
"""Creates a new archive for this object and returns its id."""
# id suffixé par YYYY-MM-DD-hh-mm-ss
2020-09-26 16:19:37 +02:00
archive_id = (
self.get_obj_dir(oid, dept_id=dept_id)
2020-09-26 16:19:37 +02:00
+ os.path.sep
2023-03-17 17:07:03 +01:00
+ "-".join([f"{x:02d}" for x in time.localtime()[:6]])
2020-09-26 16:19:37 +02:00
)
2022-04-06 18:51:01 +02:00
log(f"creating archive: {archive_id}")
2020-09-26 16:19:37 +02:00
try:
2021-02-04 20:02:44 +01:00
scu.GSL.acquire()
os.mkdir(archive_id)
except FileExistsError: # directory already exists !
pass
2020-09-26 16:19:37 +02:00
finally:
2021-02-04 20:02:44 +01:00
scu.GSL.release()
2022-06-30 09:27:46 +02:00
self.store(archive_id, "_description.txt", description)
2020-09-26 16:19:37 +02:00
return archive_id
def store(
self,
archive_id: str,
filename: str,
2023-09-12 19:57:39 +02:00
data: str | bytes,
dept_id: int = None,
2024-04-15 03:21:32 +02:00
compress=False,
):
2020-10-14 15:28:09 +02:00
"""Store data in archive, under given filename.
2020-09-26 16:19:37 +02:00
Filename may be modified (sanitized): return used filename
The file is created or replaced.
2022-06-30 09:27:46 +02:00
data may be str or bytes
2024-04-15 03:21:32 +02:00
If compress, data is gziped and filename suffix ".gz" added.
2020-09-26 16:19:37 +02:00
"""
2022-06-30 09:27:46 +02:00
if isinstance(data, str):
data = data.encode(scu.SCO_ENCODING)
self.initialize(dept_id)
2021-02-04 20:02:44 +01:00
filename = scu.sanitize_filename(filename)
2022-12-27 00:13:34 +01:00
log(f"storing {filename} ({len(data)} bytes) in {archive_id}")
2020-09-26 16:19:37 +02:00
try:
2021-02-04 20:02:44 +01:00
scu.GSL.acquire()
2020-09-26 16:19:37 +02:00
fname = os.path.join(archive_id, filename)
2024-04-15 03:21:32 +02:00
if compress:
if not fname.endswith(".gz"):
fname += ".gz"
with gzip.open(fname, "wb") as f:
f.write(data)
else:
with open(fname, "wb") as f:
f.write(data)
2023-09-18 22:45:45 +02:00
except FileNotFoundError as exc:
raise ScoValueError(
f"Erreur stockage archive (dossier inexistant, chemin {fname})"
) from exc
2020-09-26 16:19:37 +02:00
finally:
2021-02-04 20:02:44 +01:00
scu.GSL.release()
2020-09-26 16:19:37 +02:00
return filename
def get(self, archive_id: str, filename: str, dept_id: int = None):
2020-09-26 16:19:37 +02:00
"""Retreive data"""
self.initialize(dept_id)
2021-02-04 20:02:44 +01:00
if not scu.is_valid_filename(filename):
2022-12-27 00:13:34 +01:00
log(f"""Archiver.get: invalid filename '{filename}'""")
2022-03-04 18:55:45 +01:00
raise ScoValueError("archive introuvable (déjà supprimée ?)")
2020-09-26 16:19:37 +02:00
fname = os.path.join(archive_id, filename)
2022-12-27 00:13:34 +01:00
log(f"reading archive file {fname}")
2023-09-18 22:45:45 +02:00
try:
2024-04-15 03:21:32 +02:00
if fname.endswith(".gz"):
try:
with gzip.open(fname) as f:
data = f.read()
except (OSError, EOFError, zlib.error) as exc:
raise ScoValueError(
f"Erreur lecture archive ({fname} invalide)"
) from exc
else:
with open(fname, "rb") as f:
data = f.read()
2023-09-18 22:45:45 +02:00
except FileNotFoundError as exc:
raise ScoValueError(
f"Erreur lecture archive (inexistant, chemin {fname})"
) from exc
return data
2020-09-26 16:19:37 +02:00
def get_archived_file(self, oid, archive_name, filename, dept_id: int = None):
2022-12-27 00:13:34 +01:00
"""Recupère les donnees du fichier indiqué et envoie au client.
Returns: Response
"""
archive_id = self.get_id_from_name(oid, archive_name, dept_id=dept_id)
2020-09-26 16:19:37 +02:00
data = self.get(archive_id, filename)
2024-04-15 03:21:32 +02:00
if filename.endswith(".gz"):
filename = filename[:-3]
mime = mimetypes.guess_type(filename)[0]
if mime is None:
mime = "application/octet-stream"
return scu.send_file(data, filename, mime=mime)