Skip to content

Initial release

Initial release #4

name: Publish Datasets to Hugging Face Hub
on:
release:
types: [created]
jobs:
publish-to-hf-hub:
name: Publish datasets to Hugging Face Hub
runs-on: ubuntu-latest
steps:
- name: Checkout repository
uses: actions/checkout@v4
# Fetch all history for all tags and branches
with:
fetch-depth: 0
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: "3.x"
- name: Install Hugging Face Hub CLI
run: python -m pip install --upgrade pip huggingface_hub
- name: Log in to Hugging Face Hub
env:
HF_TOKEN: ${{ secrets.HF_TOKEN }} # Required: Hugging Face Hub token with write access
run: huggingface-cli login --token $HF_TOKEN
- name: Define Dataset Repository Name
id: dataset_info
run: echo "repo_id=vincentkoc/tiny_qa_benchmark_pp" >> $GITHUB_OUTPUT
- name: Push to Hub
env:
HF_DATASET_REPO_ID: ${{ steps.dataset_info.outputs.repo_id }}
RELEASE_VERSION: ${{ github.event.release.tag_name }}
run: |
echo "Uploading files for release: $RELEASE_VERSION to $HF_DATASET_REPO_ID"
# Create a temporary directory for the dataset repo
temp_dataset_repo="temp_hf_dataset_repo"
git clone "https://huggingface.co/datasets/$HF_DATASET_REPO_ID" "$temp_dataset_repo"
cd "$temp_dataset_repo"
# Ensure the target directories exist in the cloned repo
mkdir -p data/core_en
mkdir -p data/packs
mkdir -p metadata
# Copy files from the main repository to the dataset repository structure
cp -r ../data/core_en/* ./data/core_en/
cp -r ../data/packs/* ./data/packs/
# # Copy metadata directory contents if it exists and is not empty
# if [ -d ../metadata ] && [ "$(ls -A ../metadata)" ]; then
# echo "Copying metadata directory contents..."
# cp -r ../metadata/. ./metadata/
# else
# echo "Warning: Source metadata directory ../metadata is missing or empty. Skipping metadata copy."
# fi
# Copy the dedicated Hugging Face README as the main README.md for the dataset card
if [ -f ../README_hg.md ]; then
cp ../README_hg.md ./README.md
else
echo "Warning: README_hg.md not found. Falling back to main README.md for Hugging Face dataset card."
cp ../README.md ./README.md
fi
cp ../LICENCE.data_packs.md ./LICENCE.data_packs.md
# Add any other specific files you want in the dataset repo root
# Add all changes
git add .
# Commit and push
# Check if there are any changes to commit
if ! git diff --staged --quiet; then
git config user.name "GitHub Actions Bot"
git config user.email "[email protected]"
git commit -m "Update dataset from GitHub release $RELEASE_VERSION"
git push
echo "Successfully pushed changes to Hugging Face Hub dataset $HF_DATASET_REPO_ID"
else
echo "No changes to push to Hugging Face Hub dataset $HF_DATASET_REPO_ID"
fi
cd ..