1
+ name : Publish Datasets to Hugging Face Hub
2
+
3
+ on :
4
+ release :
5
+ types : [created]
6
+
7
+ jobs :
8
+ publish-to-hf-hub :
9
+ name : Publish datasets to Hugging Face Hub
10
+ runs-on : ubuntu-latest
11
+ steps :
12
+ - name : Checkout repository
13
+ uses : actions/checkout@v4
14
+ # Fetch all history for all tags and branches
15
+ with :
16
+ fetch-depth : 0
17
+
18
+ - name : Set up Python
19
+ uses : actions/setup-python@v5
20
+ with :
21
+ python-version : " 3.x"
22
+
23
+ - name : Install Hugging Face Hub CLI
24
+ run : python -m pip install --upgrade pip huggingface_hub
25
+
26
+ - name : Log in to Hugging Face Hub
27
+ env :
28
+ HF_TOKEN : ${{ secrets.HF_TOKEN }} # Required: Hugging Face Hub token with write access
29
+ run : huggingface-cli login --token $HF_TOKEN
30
+
31
+ - name : Define Dataset Repository Name
32
+ id : dataset_info
33
+ run : echo "repo_id=vincentkoc/tiny_qa_benchmark_pp" >> $GITHUB_OUTPUT
34
+
35
+ - name : Push to Hub
36
+ env :
37
+ HF_DATASET_REPO_ID : ${{ steps.dataset_info.outputs.repo_id }}
38
+ RELEASE_VERSION : ${{ github.event.release.tag_name }}
39
+ run : |
40
+ echo "Uploading files for release: $RELEASE_VERSION to $HF_DATASET_REPO_ID"
41
+
42
+ # Create a temporary directory for the dataset repo
43
+ temp_dataset_repo="temp_hf_dataset_repo"
44
+ git clone "https://huggingface.co/datasets/$HF_DATASET_REPO_ID" "$temp_dataset_repo"
45
+ cd "$temp_dataset_repo"
46
+
47
+ # Ensure the target directories exist in the cloned repo
48
+ mkdir -p data/core_en
49
+ mkdir -p data/packs
50
+ mkdir -p metadata
51
+
52
+ # Copy files from the main repository to the dataset repository structure
53
+ # Adjust paths if your data is in a different location in your source repo
54
+ cp -r ../data/core_en/* ./data/core_en/
55
+ cp -r ../data/packs/* ./data/packs/
56
+ cp -r ../metadata/* ./metadata/
57
+ # Also copy top-level READMEs and license files for the dataset for completeness
58
+ cp ../README.md ./README.md
59
+ cp ../LICENCE.data_packs.md ./LICENCE.data_packs.md
60
+ # Add any other specific files you want in the dataset repo root
61
+
62
+ # Add all changes
63
+ git add .
64
+
65
+ # Commit and push
66
+ # Check if there are any changes to commit
67
+ if ! git diff --staged --quiet; then
68
+ git config user.name "GitHub Actions Bot"
69
+ git config user.email "[email protected] "
70
+ git commit -m "Update dataset from GitHub release $RELEASE_VERSION"
71
+ git push
72
+ echo "Successfully pushed changes to Hugging Face Hub dataset $HF_DATASET_REPO_ID"
73
+ else
74
+ echo "No changes to push to Hugging Face Hub dataset $HF_DATASET_REPO_ID"
75
+ fi
76
+ cd ..
0 commit comments