ScoDoc/app/comp/inscr_mod.py

73 lines
2.9 KiB
Python
Raw Normal View History

# -*- mode: python -*-
# -*- coding: utf-8 -*-
"""Matrices d'inscription aux modules d'un semestre
"""
import numpy as np
import pandas as pd
from app import db
from app import models
#
# Le chargement des inscriptions est long: matrice nb_module x nb_etuds
# sur test debug 116 etuds, 18 modules, on est autour de 250ms.
# On a testé trois approches, ci-dessous (et retenu la 1ere)
#
2021-12-05 20:21:51 +01:00
def df_load_modimpl_inscr(formsemestre) -> pd.DataFrame:
"""Charge la matrice des inscriptions aux modules du semestre
2022-01-16 23:47:52 +01:00
rows: etudid (inscrits au semestre, avec DEM et DEF)
columns: moduleimpl_id (en chaîne)
value: bool (0/1 inscrit ou pas)
"""
# méthode la moins lente: une requete par module, merge les dataframes
2022-01-25 10:45:13 +01:00
moduleimpl_ids = [m.id for m in formsemestre.modimpls_sorted]
2022-01-16 23:47:52 +01:00
etudids = [inscr.etudid for inscr in formsemestre.inscriptions]
df = pd.DataFrame(index=etudids, dtype=int)
for moduleimpl_id in moduleimpl_ids:
ins_df = pd.read_sql_query(
"""SELECT etudid, 1 AS "%(moduleimpl_id)s"
FROM notes_moduleimpl_inscription
WHERE moduleimpl_id=%(moduleimpl_id)s""",
db.engine,
params={"moduleimpl_id": moduleimpl_id},
index_col="etudid",
dtype=int,
)
df = df.merge(ins_df, how="left", left_index=True, right_index=True)
# Force columns names to integers (moduleimpl ids)
df.columns = pd.Int64Index([int(x) for x in df.columns], dtype="int")
# les colonnes de df sont en float (Nan) quand il n'y a
# aucun inscrit au module.
df.fillna(0, inplace=True) # les non-inscrits
return df.astype(bool) # x100 25.5s 15s 17s
# chrono avec timeit:
# timeit.timeit('x = df_load_module_inscr_v0(696)', number=100, globals=globals())
2021-12-05 20:21:51 +01:00
def df_load_modimpl_inscr_v0(formsemestre):
# methode 0, pur SQL Alchemy, 1.5 à 2 fois plus lente
2022-01-25 10:45:13 +01:00
moduleimpl_ids = [m.id for m in formsemestre.modimpls_sorted]
2021-12-05 20:21:51 +01:00
etudids = [i.etudid for i in formsemestre.inscriptions]
df = pd.DataFrame(False, columns=moduleimpl_ids, index=etudids, dtype=bool)
2022-01-25 10:45:13 +01:00
for modimpl in formsemestre.modimpls_sorted:
ins_mod = df[modimpl.id]
for inscr in modimpl.inscriptions:
ins_mod[inscr.etudid] = True
return df # x100 30.7s 46s 32s
2021-12-05 20:21:51 +01:00
def df_load_modimpl_inscr_v2(formsemestre):
2022-01-25 10:45:13 +01:00
moduleimpl_ids = [m.id for m in formsemestre.modimpls_sorted]
2021-12-05 20:21:51 +01:00
etudids = [i.etudid for i in formsemestre.inscriptions]
df = pd.DataFrame(False, columns=moduleimpl_ids, index=etudids, dtype=bool)
cursor = db.engine.execute(
"select moduleimpl_id, etudid from notes_moduleimpl_inscription i, notes_moduleimpl m where i.moduleimpl_id = m.id and m.formsemestre_id = %(formsemestre_id)s",
2021-12-05 20:21:51 +01:00
{"formsemestre_id": formsemestre.id},
)
for moduleimpl_id, etudid in cursor:
df[moduleimpl_id][etudid] = True
return df # x100 44s, 31s, 29s, 28s