From 75d4c110a8d00faaef60a7511cde603babe5e033 Mon Sep 17 00:00:00 2001 From: Emmanuel Viennet Date: Fri, 1 Mar 2024 11:12:36 +0100 Subject: [PATCH] =?UTF-8?q?Am=C3=A9liore=20anonymisation=20(users)=20+=20l?= =?UTF-8?q?ien=20contact=20+=20cosmetic?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- app/but/cursus_but.py | 3 +- app/scodoc/sco_dept.py | 2 + sco_version.py | 2 +- tools/anonymize_db.py | 139 ++++++++++++++++++++++++++++++++--------- 4 files changed, 114 insertions(+), 32 deletions(-) diff --git a/app/but/cursus_but.py b/app/but/cursus_but.py index d55d1e88..efd37521 100644 --- a/app/but/cursus_but.py +++ b/app/but/cursus_but.py @@ -563,7 +563,8 @@ def formation_semestre_niveaux_warning(formation: Formation, semestre_idx: int) if nb_niveaux_tc != nb_ues_tc: H.append( f"""
  • {nb_niveaux_tc} niveaux de compétences de tronc commun, - mais {nb_ues_tc} UEs de tronc commun !
  • """ + mais {nb_ues_tc} UEs de tronc commun ! (c'est normal si + vous avez des UEs différenciées par parcours)""" ) if H: diff --git a/app/scodoc/sco_dept.py b/app/scodoc/sco_dept.py index 02eba72f..a2edbd94 100644 --- a/app/scodoc/sco_dept.py +++ b/app/scodoc/sco_dept.py @@ -208,6 +208,8 @@ def index_html(showcodes=0, showsemtable=0): """

    Assistance

    """ diff --git a/sco_version.py b/sco_version.py index 41ebc303..960e79a5 100644 --- a/sco_version.py +++ b/sco_version.py @@ -1,7 +1,7 @@ # -*- mode: python -*- # -*- coding: utf-8 -*- -SCOVERSION = "9.6.949" +SCOVERSION = "9.6.950" SCONAME = "ScoDoc" diff --git a/tools/anonymize_db.py b/tools/anonymize_db.py index e537d43f..b2583fae 100755 --- a/tools/anonymize_db.py +++ b/tools/anonymize_db.py @@ -30,12 +30,15 @@ Runned as user "scodoc" with scodoc and postgresql up. -E. Viennet, Jan 2019 -""" +Travaille entièrement au niveau SQL, n'utilise aucun modèle SQLAlchemy. -import psycopg2 +E. Viennet, Jan 2019, Fev 2024 +""" +import random import sys import traceback +import psycopg2 +from psycopg2 import extras def log(msg): @@ -59,9 +62,21 @@ anonymize_false = "FALSE" anonymize_question_str = "'?'" anonymize_null = "NULL" +# --- Listes de noms et prénoms pour remplacer les identités +NOMS = [ + x.strip() + for x in open("/opt/scodoc/tools/fakeportal/nomsprenoms/noms.txt", encoding="utf8") +] +PRENOMS = [ + x.strip() + for x in open( + "/opt/scodoc/tools/fakeportal/nomsprenoms/prenoms.txt", encoding="utf8" + ) +] + # --- Champs à anonymiser (cette configuration pourrait être placé dans # un fichier séparé et le code serait alors générique pour toute base -# posgresql. +# postgresql. # # On essaie de retirer les données personnelles des étudiants et des entreprises # @@ -111,6 +126,26 @@ def anonymize_column(cursor, tablecolumn): cursor.execute(f"UPDATE {table} SET {column} = {anonymized};") +def rename_students(cursor): + """Remet des noms/prenoms fictifs aux étuduiants""" + # Change les noms/prenoms + cursor.execute("""SELECT * FROM "identite";""") + etuds = cursor.fetchall() + for etud in etuds: + nom, prenom = random.choice(NOMS), random.choice(PRENOMS) + cursor.execute( + """UPDATE "identite" + SET nom=%(nom)s, prenom=%(prenom)s + WHERE id=%(id)s + """, + { + "id": etud["id"], + "nom": nom, + "prenom": prenom, + }, + ) + + def anonymize_users(cursor): """Anonymise la table utilisateurs""" log("processing user table") @@ -121,8 +156,51 @@ def anonymize_users(cursor): cursor.execute("""UPDATE "user" SET date_expiration = '2201-12-31';""") cursor.execute("""UPDATE "user" SET token = NULL;""") cursor.execute("""UPDATE "user" SET token_expiration = NULL;""") - cursor.execute("""UPDATE "user" SET nom=CONCAT('nom_', id);""") - cursor.execute("""UPDATE "user" SET prenom=CONCAT('nom_', id);""") + # Change les noms/prenoms/mail + cursor.execute("""SELECT * FROM "user";""") + users = cursor.fetchall() # fetch tout car modifie cette table ds la boucle + used_user_names = {u["user_name"] for u in users} + for user in users: + user_name = user["user_name"] + nom, prenom = random.choice(NOMS), random.choice(PRENOMS) + new_name = (prenom[0] + nom).lower() + # unique ? + while new_name in used_user_names: + new_name += "x" + used_user_names.add(new_name) + print(f"{user_name} > {new_name}") + cursor.execute( + """UPDATE "user" + SET nom=%(nom)s, prenom=%(prenom)s, email=%(email)s, user_name=%(new_name)s + WHERE id=%(id)s + """, + { + "email": f"{prenom}.{nom}@ano.nyme", + "id": user["id"], + "nom": nom, + "prenom": prenom, + "new_name": new_name, + }, + ) + # Change les username: utilisés en référence externe + # dans diverses tables: + for table, field in ( + ("etud_annotations", "author"), + ("scolog", "authenticated_user"), + ("scolar_news", "authenticated_user"), + ("notes_appreciations", "author"), + ("are_historique", "authenticated_user"), + ): + cursor.execute( + f"""UPDATE "{table}" + SET {field}=%(new_name)s + WHERE {field}=%(user_name)s + """, + { + "new_name": new_name, + "user_name": user_name, + }, + ) def anonymize_db(cursor): @@ -131,32 +209,33 @@ def anonymize_db(cursor): anonymize_column(cursor, tablecolumn) -process_users = False -if len(sys.argv) < 2 or len(sys.argv) > 3: - usage() -if len(sys.argv) > 2: - if sys.argv[1] != "--users": +if __name__ == "__main__": + PROCESS_USERS = False + if len(sys.argv) < 2 or len(sys.argv) > 3: usage() - dbname = sys.argv[2] - process_users = True -else: - dbname = sys.argv[1] + if len(sys.argv) > 2: + if sys.argv[1] != "--users": + usage() + dbname = sys.argv[2] + PROCESS_USERS = True + else: + dbname = sys.argv[1] -log(f"\nAnonymizing database {dbname}") -cnx_string = "dbname=" + dbname -try: - cnx = psycopg2.connect(cnx_string) -except Exception as e: - log(f"\n*** Error: can't connect to database {dbname} ***\n") - log(f"""connexion string was "{cnx_string}" """) - traceback.print_exc() + log(f"\nAnonymizing database {dbname}") + cnx_string = "dbname=" + dbname + try: + cnx = psycopg2.connect(cnx_string) + except Exception as e: + log(f"\n*** Error: can't connect to database {dbname} ***\n") + log(f"""connexion string was "{cnx_string}" """) + traceback.print_exc() -cnx.set_session(autocommit=False) -cursor = cnx.cursor() + cnx.set_session(autocommit=False) + cursor = cnx.cursor(cursor_factory=psycopg2.extras.DictCursor) -anonymize_db(cursor) -if process_users: - anonymize_users(cursor) + anonymize_db(cursor) + if PROCESS_USERS: + anonymize_users(cursor) -cnx.commit() -cnx.close() + cnx.commit() + cnx.close()