From 239abafc8d642b529259e204d7e05d47c94636ff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Xavier=20Dupr=C3=A9?= Date: Sun, 25 Jan 2026 15:16:38 +0100 Subject: [PATCH 1/4] notebook sur parcours sup --- .../articles/2026/2026-03-15-route2026-ml.rst | 6 + _doc/practice/years/2023/index.rst | 1 + _doc/practice/years/2025/index.rst | 1 + _doc/practice/years/2026/index.rst | 10 + .../practice/years/2026/parcoursup_2026.ipynb | 472809 +++++++++++++++ _doc/practice/years/index.rst | 1 + .../ut_xrun_doc/test_normalize_notebook.py | 1 + 7 files changed, 472829 insertions(+) create mode 100644 _doc/practice/years/2026/index.rst create mode 100644 _doc/practice/years/2026/parcoursup_2026.ipynb diff --git a/_doc/articles/2026/2026-03-15-route2026-ml.rst b/_doc/articles/2026/2026-03-15-route2026-ml.rst index 92226d8..a845d39 100644 --- a/_doc/articles/2026/2026-03-15-route2026-ml.rst +++ b/_doc/articles/2026/2026-03-15-route2026-ml.rst @@ -38,6 +38,12 @@ Séance 1 (6/2) * utilisation de ChatGPT, Gemini (et autres...), retour d'expérience * et ensuite... +*problème* + +Peut-on prédire le nombre de condidatures en 2026 pour chaque établissement ? + +:ref:`Données parcours-sup 2021-2025 ` + Séance 2 (13/2) =============== diff --git a/_doc/practice/years/2023/index.rst b/_doc/practice/years/2023/index.rst index 7ad1158..cc3db07 100644 --- a/_doc/practice/years/2023/index.rst +++ b/_doc/practice/years/2023/index.rst @@ -3,6 +3,7 @@ .. toctree:: :maxdepth: 1 + :caption: python editdist pivot_gauss diff --git a/_doc/practice/years/2025/index.rst b/_doc/practice/years/2025/index.rst index 6381ae4..c21fb4f 100644 --- a/_doc/practice/years/2025/index.rst +++ b/_doc/practice/years/2025/index.rst @@ -5,6 +5,7 @@ .. toctree:: :maxdepth: 1 + :caption: python seance1_point2d seance4_algo diff --git a/_doc/practice/years/2026/index.rst b/_doc/practice/years/2026/index.rst new file mode 100644 index 0000000..2274725 --- /dev/null +++ b/_doc/practice/years/2026/index.rst @@ -0,0 +1,10 @@ +.. _l-notebook-2026: + +2026 : notebooks créés en séances +================================= + +.. toctree:: + :maxdepth: 1 + :caption: machine learning + + parcoursup_2026 diff --git a/_doc/practice/years/2026/parcoursup_2026.ipynb b/_doc/practice/years/2026/parcoursup_2026.ipynb new file mode 100644 index 0000000..0b96af6 --- /dev/null +++ b/_doc/practice/years/2026/parcoursup_2026.ipynb @@ -0,0 +1,472809 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Données parcours-sup 2021-2025\n", + "\n", + "Voir [Parcoursup 2025 - vœux de poursuite d'études et de réorientation dans l'enseignement supérieur et réponses des établissements](https://data.enseignementsup-recherche.gouv.fr/explore/dataset/fr-esr-parcoursup/information/)." + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas\n", + "from teachpyx.tools.pandas import read_csv_cached\n", + "from skrub import TableReport" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Récupération des données" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "urls = {\n", + " \"2021\": \"https://data.enseignementsup-recherche.gouv.fr/api/explore/v2.1/catalog/datasets/fr-esr-parcoursup_2021/exports/csv?lang=fr&timezone=Europe%2FBerlin&use_labels=true&delimiter=%3B\",\n", + " \"2022\": \"https://data.enseignementsup-recherche.gouv.fr/api/explore/v2.1/catalog/datasets/fr-esr-parcoursup_2022/exports/csv?lang=fr&timezone=Europe%2FBerlin&use_labels=true&delimiter=%3B\",\n", + " \"2023\": \"https://data.enseignementsup-recherche.gouv.fr/api/explore/v2.1/catalog/datasets/fr-esr-parcoursup_2023/exports/csv?lang=fr&timezone=Europe%2FBerlin&use_labels=true&delimiter=%3B\",\n", + " \"2024\": \"https://data.enseignementsup-recherche.gouv.fr/api/explore/v2.1/catalog/datasets/fr-esr-parcoursup_2024/exports/csv?lang=fr&timezone=Europe%2FBerlin&use_labels=true&delimiter=%3B\",\n", + " \"2025\": \"https://data.enseignementsup-recherche.gouv.fr/api/explore/v2.1/catalog/datasets/fr-esr-parcoursup/exports/csv?lang=fr&timezone=Europe%2FBerlin&use_labels=true&delimiter=%3B\",\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Processing column 118 / 118\n" + ] + }, + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "
\n", + "

Please enable javascript

\n", + "

\n", + " The skrub table reports need javascript to display correctly. If you are\n", + " displaying a report in a Jupyter notebook and you see this message, you may need to\n", + " re-execute the cell or to trust the notebook (button on the top right or\n", + " \"File > Trust notebook\").\n", + "

\n", + "
\n", + "\n", + "" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df2025 = read_csv_cached(urls[\"2025\"], sep=\";\")\n", + "TableReport(df2025, max_plot_columns=120, max_association_columns=120)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Processing column 118 / 118\n" + ] + }, + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "
\n", + "

Please enable javascript

\n", + "

\n", + " The skrub table reports need javascript to display correctly. If you are\n", + " displaying a report in a Jupyter notebook and you see this message, you may need to\n", + " re-execute the cell or to trust the notebook (button on the top right or\n", + " \"File > Trust notebook\").\n", + "

\n", + "
\n", + "\n", + "" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df2024 = read_csv_cached(urls[\"2024\"], sep=\";\")\n", + "TableReport(df2024, max_plot_columns=120, max_association_columns=120)" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Processing column 118 / 118\n" + ] + }, + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "
\n", + "

Please enable javascript

\n", + "

\n", + " The skrub table reports need javascript to display correctly. If you are\n", + " displaying a report in a Jupyter notebook and you see this message, you may need to\n", + " re-execute the cell or to trust the notebook (button on the top right or\n", + " \"File > Trust notebook\").\n", + "

\n", + "
\n", + "\n", + "" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df2023 = read_csv_cached(urls[\"2023\"], sep=\";\")\n", + "TableReport(df2023, max_plot_columns=120, max_association_columns=120)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "loading '2021'\n", + "loading '2022'\n", + "loading '2023'\n", + "loading '2024'\n", + "loading '2025'\n" + ] + } + ], + "source": [ + "dfs = {}\n", + "for k, url in urls.items():\n", + " print(f\"loading {k!r}\")\n", + " dfs[k] = read_csv_cached(url, sep=\";\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Fusion des années" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(69240, 123)" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = pandas.concat(dfs.values(), axis=0)\n", + "df.shape" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Quelques explorations" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['Effectif des admis en phase principale',\n", + " 'Effectif des admis en phase complémentaire',\n", + " 'Effectif des admis néo bacheliers',\n", + " 'Effectif des admis néo bacheliers généraux',\n", + " 'Effectif des admis néo bacheliers technologiques',\n", + " 'Effectif des admis néo bacheliers professionnels',\n", + " 'Effectif des admis néo bacheliers généraux ayant eu une mention au bac',\n", + " 'Effectif des admis néo bacheliers technologiques ayant eu une mention au bac',\n", + " 'Effectif des admis néo bacheliers professionnels ayant eu une mention au bac']" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "admis = [c for c in df.columns if \"Effectif des admis\" in c]\n", + "admis" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [], + "source": [ + "eff = df[\n", + " [\n", + " \"Session\",\n", + " \"Code UAI de l'établissement\",\n", + " \"Établissement\",\n", + " \"Sélectivité\",\n", + " \"Filière de formation très agrégée\",\n", + " \"Capacité de l’établissement par formation\",\n", + " \"Effectif total des candidats pour une formation\",\n", + " \"Effectif total des candidats en phase principale\",\n", + " \"Effectif des autres candidats en phase principale\",\n", + " \"Effectif total des candidats en phase complémentaire\",\n", + " \"Effectifs des autres candidats en phase complémentaire\",\n", + " \"Effectif total des candidats classés par l’établissement en phase principale\",\n", + " \"Effectif des candidats classés par l’établissement en phase complémentaire\",\n", + " \"% d’admis ayant reçu leur proposition d’admission avant la fin de la procédure principale\",\n", + " \"% d’admis dont filles\",\n", + " \"Rang du dernier appelé du groupe 1\",\n", + " \"Rang du dernier appelé du groupe 2\",\n", + " \"Rang du dernier appelé du groupe 3\",\n", + " \"Concours communs et banque d'épreuves\",\n", + " \"Taux d’accès\",\n", + " ]\n", + "]" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Processing column 20 / 20\n" + ] + }, + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "
\n", + "

Please enable javascript

\n", + "

\n", + " The skrub table reports need javascript to display correctly. If you are\n", + " displaying a report in a Jupyter notebook and you see this message, you may need to\n", + " re-execute the cell or to trust the notebook (button on the top right or\n", + " \"File > Trust notebook\").\n", + "

\n", + "
\n", + "\n", + "" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "TableReport(eff)" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Processing column 5 / 5\n" + ] + }, + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "
\n", + "

Please enable javascript

\n", + "

\n", + " The skrub table reports need javascript to display correctly. If you are\n", + " displaying a report in a Jupyter notebook and you see this message, you may need to\n", + " re-execute the cell or to trust the notebook (button on the top right or\n", + " \"File > Trust notebook\").\n", + "

\n", + "
\n", + "\n", + "" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "capa = df[\n", + " [\n", + " \"Session\",\n", + " \"Code UAI de l'établissement\",\n", + " \"Établissement\",\n", + " \"Filière de formation très agrégée\",\n", + " \"Capacité de l’établissement par formation\",\n", + " ]\n", + "]\n", + "TableReport(capa)" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Processing column 5 / 5\n" + ] + }, + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "
\n", + "

Please enable javascript

\n", + "

\n", + " The skrub table reports need javascript to display correctly. If you are\n", + " displaying a report in a Jupyter notebook and you see this message, you may need to\n", + " re-execute the cell or to trust the notebook (button on the top right or\n", + " \"File > Trust notebook\").\n", + "

\n", + "
\n", + "\n", + "" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 27, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "piv = capa.pivot_table(\n", + " index=[\n", + " \"Code UAI de l'établissement\",\n", + " \"Établissement\",\n", + " \"Filière de formation très agrégée\",\n", + " ],\n", + " columns=[\"Session\"],\n", + " values=[\"Capacité de l’établissement par formation\"],\n", + ")\n", + "TableReport(piv)" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Processing column 5 / 5\n" + ] + }, + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "
\n", + "

Please enable javascript

\n", + "

\n", + " The skrub table reports need javascript to display correctly. If you are\n", + " displaying a report in a Jupyter notebook and you see this message, you may need to\n", + " re-execute the cell or to trust the notebook (button on the top right or\n", + " \"File > Trust notebook\").\n", + "

\n", + "
\n", + "\n", + "" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 26, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "candidats = df[\n", + " [\n", + " \"Session\",\n", + " \"Code UAI de l'établissement\",\n", + " \"Établissement\",\n", + " \"Filière de formation très agrégée\",\n", + " \"Effectif total des candidats pour une formation\",\n", + " ]\n", + "]\n", + "TableReport(candidats)" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Processing column 1 / 5\r" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Processing column 5 / 5\n" + ] + }, + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "
\n", + "

Please enable javascript

\n", + "

\n", + " The skrub table reports need javascript to display correctly. If you are\n", + " displaying a report in a Jupyter notebook and you see this message, you may need to\n", + " re-execute the cell or to trust the notebook (button on the top right or\n", + " \"File > Trust notebook\").\n", + "

\n", + "
\n", + "\n", + "" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 29, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "piv = candidats.pivot_table(\n", + " index=[\n", + " \"Code UAI de l'établissement\",\n", + " \"Établissement\",\n", + " \"Filière de formation très agrégée\",\n", + " ],\n", + " columns=[\"Session\"],\n", + " values=[\"Effectif total des candidats pour une formation\"],\n", + ")\n", + "TableReport(piv)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "this312", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.3" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} \ No newline at end of file diff --git a/_doc/practice/years/index.rst b/_doc/practice/years/index.rst index 69e0eb2..1d58889 100644 --- a/_doc/practice/years/index.rst +++ b/_doc/practice/years/index.rst @@ -6,4 +6,5 @@ Notebooks écrits durant les séances 2023/index 2025/index + 2026/index diff --git a/_unittests/ut_xrun_doc/test_normalize_notebook.py b/_unittests/ut_xrun_doc/test_normalize_notebook.py index 45e7c66..f3bbbb2 100644 --- a/_unittests/ut_xrun_doc/test_normalize_notebook.py +++ b/_unittests/ut_xrun_doc/test_normalize_notebook.py @@ -77,6 +77,7 @@ def add_test_methods(cls): os.path.join(this, "..", "..", "_doc", "practice", "tds-base"), os.path.join(this, "..", "..", "_doc", "practice", "years", "2023"), os.path.join(this, "..", "..", "_doc", "practice", "years", "2025"), + os.path.join(this, "..", "..", "_doc", "practice", "years", "2026"), ] for fold in folds: cls.add_test_methods_path(os.path.normpath(fold)) From af9baf274dd0e8fe6bef1e505b255384ff313cae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Xavier=20Dupr=C3=A9?= Date: Sun, 25 Jan 2026 15:26:44 +0100 Subject: [PATCH 2/4] upgrade version --- CHANGELOGS.rst | 5 +++++ pyproject.toml | 2 +- teachpyx/__init__.py | 2 +- 3 files changed, 7 insertions(+), 2 deletions(-) diff --git a/CHANGELOGS.rst b/CHANGELOGS.rst index 3dd9813..c248555 100644 --- a/CHANGELOGS.rst +++ b/CHANGELOGS.rst @@ -1,9 +1,14 @@ Change Logs =========== +0.6.0 ++++++ + 0.5.0 +++++ +* :pr:`87`: add dependency on tqdm, add read_csv_cached +* :pr:`81`: remove dependency on blockdiag * :pr:`66`: add dependency on patsy in requirements-dev.txt for new content 0.4.0 diff --git a/pyproject.toml b/pyproject.toml index 702f8a4..9875c08 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -25,7 +25,7 @@ license = {file = "LICENSE.txt"} name = "teachpyx" readme = "README.rst" requires-python = ">=3.9" -version = "0.5.0" +version = "0.6.0" [project.urls] Homepage = "https://sdpython.github.io/doc/teachpyx/dev/" diff --git a/teachpyx/__init__.py b/teachpyx/__init__.py index ad683b1..a1abdd7 100644 --- a/teachpyx/__init__.py +++ b/teachpyx/__init__.py @@ -1,6 +1,6 @@ # coding: utf-8 -__version__ = "0.5.0" +__version__ = "0.6.0" __author__ = "Xavier Dupré" __github__ = "https://github.com/sdpython/teachpyx" __url__ = "https://sdpython.github.io/doc/teachpyx/dev/" From 0cbc366862d8e4a6a5a99a3945d0a8b4a91aca64 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Xavier=20Dupr=C3=A9?= Date: Sun, 25 Jan 2026 15:32:02 +0100 Subject: [PATCH 3/4] doc --- _doc/index.rst | 3 ++- teachpyx/tools/pandas.py | 2 ++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/_doc/index.rst b/_doc/index.rst index d526d74..155d217 100644 --- a/_doc/index.rst +++ b/_doc/index.rst @@ -79,4 +79,5 @@ Le contenu est sur `github `_. Older versions ++++++++++++++ -* `0.3.1 <../v0.3.1/index.html>`_ +* `0.6.0 <../v0.6.0/index.html>`_ +* `0.5.0 <../v0.5.0/index.html>`_ diff --git a/teachpyx/tools/pandas.py b/teachpyx/tools/pandas.py index 165e544..ca5f08b 100644 --- a/teachpyx/tools/pandas.py +++ b/teachpyx/tools/pandas.py @@ -33,6 +33,8 @@ def read_csv_cached( :param ignore_cache: ignore the cache, overwrites it if it exists :param kwargs: other argument for :func:`pandas.read_csv` :return: dataframe + + .. versionadded:: 0.5.0 """ cache_dir = Path.home() / ".cache" / "teachpyx" / "pandas" cache_dir.mkdir(parents=True, exist_ok=True) From 1b107c1470ec5aabcbf6e4298e99d611532fc68a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Xavier=20Dupr=C3=A9?= Date: Sun, 25 Jan 2026 15:51:33 +0100 Subject: [PATCH 4/4] fix urls --- _doc/articles/2026/2026-03-15-route2026-ml.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/_doc/articles/2026/2026-03-15-route2026-ml.rst b/_doc/articles/2026/2026-03-15-route2026-ml.rst index a845d39..12daee8 100644 --- a/_doc/articles/2026/2026-03-15-route2026-ml.rst +++ b/_doc/articles/2026/2026-03-15-route2026-ml.rst @@ -11,7 +11,7 @@ site web : `sdpython.github.io `_ Autres sites : -* Python pour la data science `_ (Lino Galiana - ENSAE) +* `Python pour la data science `_ (Lino Galiana - ENSAE) Fonctions utiles: