normalisation samples

This commit is contained in:
Jean-Marie PLACE 2022-08-20 18:19:35 +02:00
parent a761a628bd
commit e5a01620f7
1 changed files with 12 additions and 2 deletions

View File

@ -43,6 +43,7 @@ TODO: ajouter un argument au script permettant de ne générer qu'un seul fichie
import os
import shutil
import sys
import re
from collections import defaultdict
from pprint import pprint as pp
from pprint import pformat as pf
@ -120,7 +121,14 @@ class Sample:
file.write(f"> `{self.content}`\n\n")
file.write("```json\n")
file.write(json.dumps(self.result, indent=4))
content = json.dumps(self.result, indent=4, sort_keys=True)
content = content.replace("... etc.", "...")
# regexp for date like: "2022-08-14T10:01:44.043869+02:00"
regexp = re.compile(
r'"(-?(?:[1-9][0-9]*)?[0-9]{4})-(1[0-2]|0[1-9])-(3[01]|0[1-9]|[12][0-9])T(2[0-3]|[01][0-9]):([0-5][0-9]):([0-5][0-9])(\.[0-9]+)?(Z|[+-](?:2[0-3]|[01][0-9]):[0-5][0-9])?"'
)
content = regexp.sub('"2022-08-20T12:00:00.000000+02:00"', content)
file.write(content)
file.write("\n```\n\n")
@ -154,7 +162,9 @@ class Samples:
for entry, samples in self.entries.items():
file = open(f"{DATA_DIR}sample_{entry}.json.md", "tw")
file.write(f"### {entry}\n\n")
for sample in samples:
for sample in sorted(
samples, key=lambda s: s.url
): # sorted de façon à rendre le fichier résultat déterministe (i.e. indépendant de l ordre d arrivée des résultats)
sample.dump(file)
file.close()