Commit
·
5a9b2ce
1
Parent(s):
c6a30f0
Delete script2.py
Browse files- script2.py +0 -47
script2.py
DELETED
|
@@ -1,47 +0,0 @@
|
|
| 1 |
-
import os
|
| 2 |
-
import json
|
| 3 |
-
import random
|
| 4 |
-
from glob import glob
|
| 5 |
-
from huggingface_hub import Repository
|
| 6 |
-
|
| 7 |
-
# Name of the combined file
|
| 8 |
-
output_file_name = "combined_conversations.jsonl"
|
| 9 |
-
# Hugging Face Hub repository ID
|
| 10 |
-
repo_id = "AlignmentLab-AI/idonteven"
|
| 11 |
-
|
| 12 |
-
# Shuffle and combine jsonl files
|
| 13 |
-
def shuffle_and_combine_jsonls(output_file_name):
|
| 14 |
-
all_lines = []
|
| 15 |
-
for jsonl_file in glob("*.jsonl"):
|
| 16 |
-
with open(jsonl_file, 'r') as file:
|
| 17 |
-
all_lines.extend(file.readlines())
|
| 18 |
-
random.shuffle(all_lines)
|
| 19 |
-
with open(output_file_name, 'w') as outfile:
|
| 20 |
-
outfile.writelines(all_lines)
|
| 21 |
-
return output_file_name
|
| 22 |
-
|
| 23 |
-
# Clone your repository from Hugging Face and return the local path
|
| 24 |
-
def clone_repository(repo_id):
|
| 25 |
-
repo = Repository(repo_id, clone_from=repo_id)
|
| 26 |
-
return repo
|
| 27 |
-
|
| 28 |
-
# Copy the combined jsonl file and scripts to the cloned repository
|
| 29 |
-
def copy_files_to_repo(combined_jsonl_path):
|
| 30 |
-
# Copy the combined jsonl file
|
| 31 |
-
os.system(f"cp {combined_jsonl_path} {repo_id}")
|
| 32 |
-
# Copy other necessary files, e.g., scripts
|
| 33 |
-
for file in glob("*"):
|
| 34 |
-
if file != repo_id:
|
| 35 |
-
os.system(f"cp {file} {repo_id}")
|
| 36 |
-
|
| 37 |
-
# Commit and push changes to the Hugging Face repository
|
| 38 |
-
def push_to_hub(repo):
|
| 39 |
-
repo.git_add()
|
| 40 |
-
repo.git_commit("Update dataset")
|
| 41 |
-
repo.git_push()
|
| 42 |
-
|
| 43 |
-
# Run all steps
|
| 44 |
-
combined_jsonl_path = shuffle_and_combine_jsonls(output_file_name)
|
| 45 |
-
repo = clone_repository(repo_id)
|
| 46 |
-
copy_files_to_repo(combined_jsonl_path)
|
| 47 |
-
push_to_hub(repo)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|