#!/opt/scodoc/venv/bin/python # -*- coding: utf-8 -*- # -*- mode: python -*- ############################################################################## # # Gestion scolarite IUT # # Copyright (c) 1999 - 2023 Emmanuel Viennet. All rights reserved. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA # # Emmanuel Viennet emmanuel.viennet@viennet.net # ############################################################################## """Anonymize une base de données ScoDoc Runned as user "scodoc" with scodoc and postgresql up. E. Viennet, Jan 2019 """ import psycopg2 import sys import traceback def log(msg): sys.stdout.flush() sys.stderr.write(msg + "\n") sys.stderr.flush() def usage(): sys.stdout.flush() sys.stderr.flush() print(f"Usage: {sys.argv[0]} [--users] dbname", file=sys.stderr) sys.exit(1) # --- Fonctions d'Anonymisation, en SQL anonymize_name = "random_text_md5(8)" anonymize_date = "'1970-01-01'" anonymize_question_str = "'?'" anonymize_null = "NULL" # --- Champs à anonymiser (cette configuration pourrait être placé dans # un fichier séparé et le code serait alors générique pour toute base # posgresql. # # On essaie de retirer les données personnelles des étudiants et des entreprises # # ANONYMIZED_FIELDS = { "identite.nom": anonymize_name, "identite.prenom": anonymize_name, "identite.nom_usuel": anonymize_null, "identite.civilite": "'X'", "identite.date_naissance": anonymize_date, "identite.lieu_naissance": anonymize_question_str, "identite.dept_naissance": anonymize_question_str, "identite.nationalite": anonymize_question_str, "identite.statut": anonymize_null, "identite.boursier": anonymize_null, "identite.photo_filename": anonymize_null, "identite.code_nip": anonymize_null, "identite.code_ine": anonymize_null, "identite.scodoc7_id": anonymize_null, "adresse.email": "'ano@nyme.fr'", "adresse.emailperso": anonymize_null, "adresse.domicile": anonymize_null, "adresse.codepostaldomicile": anonymize_null, "adresse.villedomicile": anonymize_null, "adresse.paysdomicile": anonymize_null, "adresse.telephone": anonymize_null, "adresse.telephonemobile": anonymize_null, "adresse.fax": anonymize_null, "admissions.nomlycee": anonymize_name, "billet_absence.description": anonymize_null, "etud_annotations.comment": anonymize_name, "notes_appreciations.comment": anonymize_name, } def anonymize_column(cursor, tablecolumn): """Anonymise une colonne tablecolumn est de la forme nom_de_table.nom_de_colonne, par exemple "identite.nom" key_name est le nom de la colonne (clé) à utiliser pour certains remplacements (cette clé doit être anonyme et unique). Par exemple, un nom propre pourrait être remplacé par nom_valeur_de_la_clé. """ table, column = tablecolumn.split(".") anonymized = ANONYMIZED_FIELDS[tablecolumn] log(f"processing {tablecolumn}") cursor.execute(f"UPDATE {table} SET {column} = {anonymized};") def anonymize_users(cursor): """Anonymise la table utilisateurs""" log("processing user table") cursor.execute("""UPDATE "user" SET email = 'x@y.fr';""") cursor.execute("""UPDATE "user" SET password_hash = '*';""") cursor.execute("""UPDATE "user" SET password_scodoc7 = NULL;""") cursor.execute("""UPDATE "user" SET date_created = '2001-01-01';""") cursor.execute("""UPDATE "user" SET date_expiration = '2201-12-31';""") cursor.execute("""UPDATE "user" SET token = NULL;""") cursor.execute("""UPDATE "user" SET token_expiration = NULL;""") cursor.execute("""UPDATE "user" SET nom=CONCAT('nom_', id);""") cursor.execute("""UPDATE "user" SET prenom=CONCAT('nom_', id);""") def anonymize_db(cursor): """Traite, une à une, les colonnes indiquées dans ANONYMIZED_FIELDS""" for tablecolumn in ANONYMIZED_FIELDS: anonymize_column(cursor, tablecolumn) process_users = False if len(sys.argv) < 2 or len(sys.argv) > 3: usage() if len(sys.argv) > 2: if sys.argv[1] != "--users": usage() dbname = sys.argv[2] process_users = True else: dbname = sys.argv[1] log(f"\nAnonymizing database {dbname}") cnx_string = "dbname=" + dbname try: cnx = psycopg2.connect(cnx_string) except Exception as e: log(f"\n*** Error: can't connect to database {dbname} ***\n") log(f"""connexion string was "{cnx_string}" """) traceback.print_exc() cnx.set_session(autocommit=False) cursor = cnx.cursor() anonymize_db(cursor) if process_users: anonymize_users(cursor) cnx.commit() cnx.close()