forked from ScoDoc/ScoDoc
normalisation samples
This commit is contained in:
parent
a761a628bd
commit
e5a01620f7
|
@ -43,6 +43,7 @@ TODO: ajouter un argument au script permettant de ne générer qu'un seul fichie
|
|||
import os
|
||||
import shutil
|
||||
import sys
|
||||
import re
|
||||
from collections import defaultdict
|
||||
from pprint import pprint as pp
|
||||
from pprint import pformat as pf
|
||||
|
@ -120,7 +121,14 @@ class Sample:
|
|||
file.write(f"> `{self.content}`\n\n")
|
||||
|
||||
file.write("```json\n")
|
||||
file.write(json.dumps(self.result, indent=4))
|
||||
content = json.dumps(self.result, indent=4, sort_keys=True)
|
||||
content = content.replace("... etc.", "...")
|
||||
# regexp for date like: "2022-08-14T10:01:44.043869+02:00"
|
||||
regexp = re.compile(
|
||||
r'"(-?(?:[1-9][0-9]*)?[0-9]{4})-(1[0-2]|0[1-9])-(3[01]|0[1-9]|[12][0-9])T(2[0-3]|[01][0-9]):([0-5][0-9]):([0-5][0-9])(\.[0-9]+)?(Z|[+-](?:2[0-3]|[01][0-9]):[0-5][0-9])?"'
|
||||
)
|
||||
content = regexp.sub('"2022-08-20T12:00:00.000000+02:00"', content)
|
||||
file.write(content)
|
||||
file.write("\n```\n\n")
|
||||
|
||||
|
||||
|
@ -154,7 +162,9 @@ class Samples:
|
|||
for entry, samples in self.entries.items():
|
||||
file = open(f"{DATA_DIR}sample_{entry}.json.md", "tw")
|
||||
file.write(f"### {entry}\n\n")
|
||||
for sample in samples:
|
||||
for sample in sorted(
|
||||
samples, key=lambda s: s.url
|
||||
): # sorted de façon à rendre le fichier résultat déterministe (i.e. indépendant de l ordre d arrivée des résultats)
|
||||
sample.dump(file)
|
||||
file.close()
|
||||
|
||||
|
|
Loading…
Reference in New Issue