# -*- mode: python -*- # -*- coding: utf-8 -*- """Matrices d'inscription aux modules d'un semestre """ import pandas as pd from app import db # # Le chargement des inscriptions est long: matrice nb_module x nb_etuds # sur test debug 116 etuds, 18 modules, on est autour de 250ms. # On a testé trois approches, ci-dessous (et retenu la 1ere) # def df_load_modimpl_inscr(formsemestre) -> pd.DataFrame: """Charge la matrice des inscriptions aux modules du semestre rows: etudid (inscrits au semestre, avec DEM et DEF) columns: moduleimpl_id value: bool (0/1 inscrit ou pas) """ # méthode la moins lente: une requete par module, merge les dataframes moduleimpl_ids = [m.id for m in formsemestre.modimpls_sorted] etudids = [inscr.etudid for inscr in formsemestre.inscriptions] df = pd.DataFrame(index=etudids, dtype=int) for moduleimpl_id in moduleimpl_ids: ins_df = pd.read_sql_query( """SELECT etudid, 1 AS "%(moduleimpl_id)s" FROM notes_moduleimpl_inscription WHERE moduleimpl_id=%(moduleimpl_id)s""", db.engine, params={"moduleimpl_id": moduleimpl_id}, index_col="etudid", dtype=int, ) df = df.merge(ins_df, how="left", left_index=True, right_index=True) # Force columns names to integers (moduleimpl ids) df.columns = pd.Index([int(x) for x in df.columns], dtype=int) # les colonnes de df sont en float (Nan) quand il n'y a # aucun inscrit au module. df.fillna(0, inplace=True) # les non-inscrits return df.astype(bool) # x100 25.5s 15s 17s # chrono avec timeit: # timeit.timeit('x = df_load_module_inscr_v0(696)', number=100, globals=globals()) def df_load_modimpl_inscr_v0(formsemestre): # methode 0, pur SQL Alchemy, 1.5 à 2 fois plus lente moduleimpl_ids = [m.id for m in formsemestre.modimpls_sorted] etudids = [i.etudid for i in formsemestre.inscriptions] df = pd.DataFrame(False, columns=moduleimpl_ids, index=etudids, dtype=bool) for modimpl in formsemestre.modimpls_sorted: ins_mod = df[modimpl.id] for inscr in modimpl.inscriptions: ins_mod[inscr.etudid] = True return df # x100 30.7s 46s 32s def df_load_modimpl_inscr_v2(formsemestre): moduleimpl_ids = [m.id for m in formsemestre.modimpls_sorted] etudids = [i.etudid for i in formsemestre.inscriptions] df = pd.DataFrame(False, columns=moduleimpl_ids, index=etudids, dtype=bool) cursor = db.engine.execute( "select moduleimpl_id, etudid from notes_moduleimpl_inscription i, notes_moduleimpl m where i.moduleimpl_id = m.id and m.formsemestre_id = %(formsemestre_id)s", {"formsemestre_id": formsemestre.id}, ) for moduleimpl_id, etudid in cursor: df[moduleimpl_id][etudid] = True return df # x100 44s, 31s, 29s, 28s