Initial release #4
Workflow file for this run
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: Publish Datasets to Hugging Face Hub | |
on: | |
release: | |
types: [created] | |
jobs: | |
publish-to-hf-hub: | |
name: Publish datasets to Hugging Face Hub | |
runs-on: ubuntu-latest | |
steps: | |
- name: Checkout repository | |
uses: actions/checkout@v4 | |
# Fetch all history for all tags and branches | |
with: | |
fetch-depth: 0 | |
- name: Set up Python | |
uses: actions/setup-python@v5 | |
with: | |
python-version: "3.x" | |
- name: Install Hugging Face Hub CLI | |
run: python -m pip install --upgrade pip huggingface_hub | |
- name: Log in to Hugging Face Hub | |
env: | |
HF_TOKEN: ${{ secrets.HF_TOKEN }} # Required: Hugging Face Hub token with write access | |
run: huggingface-cli login --token $HF_TOKEN | |
- name: Define Dataset Repository Name | |
id: dataset_info | |
run: echo "repo_id=vincentkoc/tiny_qa_benchmark_pp" >> $GITHUB_OUTPUT | |
- name: Push to Hub | |
env: | |
HF_DATASET_REPO_ID: ${{ steps.dataset_info.outputs.repo_id }} | |
RELEASE_VERSION: ${{ github.event.release.tag_name }} | |
run: | | |
echo "Uploading files for release: $RELEASE_VERSION to $HF_DATASET_REPO_ID" | |
# Create a temporary directory for the dataset repo | |
temp_dataset_repo="temp_hf_dataset_repo" | |
git clone "https://huggingface.co/datasets/$HF_DATASET_REPO_ID" "$temp_dataset_repo" | |
cd "$temp_dataset_repo" | |
# Ensure the target directories exist in the cloned repo | |
mkdir -p data/core_en | |
mkdir -p data/packs | |
mkdir -p metadata | |
# Copy files from the main repository to the dataset repository structure | |
cp -r ../data/core_en/* ./data/core_en/ | |
cp -r ../data/packs/* ./data/packs/ | |
# # Copy metadata directory contents if it exists and is not empty | |
# if [ -d ../metadata ] && [ "$(ls -A ../metadata)" ]; then | |
# echo "Copying metadata directory contents..." | |
# cp -r ../metadata/. ./metadata/ | |
# else | |
# echo "Warning: Source metadata directory ../metadata is missing or empty. Skipping metadata copy." | |
# fi | |
# Copy the dedicated Hugging Face README as the main README.md for the dataset card | |
if [ -f ../README_hg.md ]; then | |
cp ../README_hg.md ./README.md | |
else | |
echo "Warning: README_hg.md not found. Falling back to main README.md for Hugging Face dataset card." | |
cp ../README.md ./README.md | |
fi | |
cp ../LICENCE.data_packs.md ./LICENCE.data_packs.md | |
# Add any other specific files you want in the dataset repo root | |
# Add all changes | |
git add . | |
# Commit and push | |
# Check if there are any changes to commit | |
if ! git diff --staged --quiet; then | |
git config user.name "GitHub Actions Bot" | |
git config user.email "[email protected]" | |
git commit -m "Update dataset from GitHub release $RELEASE_VERSION" | |
git push | |
echo "Successfully pushed changes to Hugging Face Hub dataset $HF_DATASET_REPO_ID" | |
else | |
echo "No changes to push to Hugging Face Hub dataset $HF_DATASET_REPO_ID" | |
fi | |
cd .. |