pliny-the-prompter commited on
Commit
b46e97f
Β·
verified Β·
1 Parent(s): 8d63ab3

Upload 130 files

Browse files
app.py CHANGED
@@ -278,6 +278,12 @@ METHODS = {
278
  "nuclear (maximum force combo)": "nuclear",
279
  }
280
 
 
 
 
 
 
 
281
  # Import preset configs for Advanced Settings defaults
282
  from obliteratus.abliterate import METHODS as _PRESET_CONFIGS # noqa: E402
283
  from obliteratus.prompts import ( # noqa: E402
@@ -383,10 +389,10 @@ def _validate_hub_repo(hub_repo: str) -> str:
383
  "Invalid repo format β€” use `username/model-name` "
384
  "(letters, numbers, hyphens, dots only)"
385
  )
386
- if not os.environ.get("HF_TOKEN"):
387
  warnings.append(
388
- "HF_TOKEN not set β€” push to Hub will fail. "
389
- "Set it via: `export HF_TOKEN=hf_...`"
390
  )
391
  if warnings:
392
  return "**Warning:** " + " | ".join(warnings)
@@ -1600,7 +1606,11 @@ def obliterate(model_choice: str, method_choice: str,
1600
  )
1601
  return
1602
 
1603
- # Early validation: Hub repo format + HF_TOKEN
 
 
 
 
1604
  if push_to_hub:
1605
  if push_to_hub != "auto" and not re.match(r'^[a-zA-Z0-9_-]+/[a-zA-Z0-9_.-]+$', push_to_hub):
1606
  yield (
@@ -1608,10 +1618,10 @@ def obliterate(model_choice: str, method_choice: str,
1608
  "", gr.update(), gr.update(), gr.update(), gr.update(),
1609
  )
1610
  return
1611
- if not os.environ.get("HF_TOKEN"):
1612
  yield (
1613
- "**Error:** HF_TOKEN not set. Push to Hub requires a write token. "
1614
- "Set it via `export HF_TOKEN=hf_...` or in your Space secrets.",
1615
  "", gr.update(), gr.update(), gr.update(), gr.update(),
1616
  )
1617
  return
@@ -1690,6 +1700,8 @@ def obliterate(model_choice: str, method_choice: str,
1690
  device="auto",
1691
  dtype="float16",
1692
  push_to_hub=push_to_hub,
 
 
1693
  quantization=quantization,
1694
  trust_remote_code=is_preset,
1695
  harmful_prompts=harmful_all[:n],
@@ -1708,6 +1720,8 @@ def obliterate(model_choice: str, method_choice: str,
1708
  dtype="float16",
1709
  method=method,
1710
  push_to_hub=push_to_hub,
 
 
1711
  quantization=quantization,
1712
  trust_remote_code=is_preset,
1713
  harmful_prompts=harmful_all[:n],
@@ -1762,7 +1776,8 @@ def obliterate(model_choice: str, method_choice: str,
1762
  log_lines.append(f"Prompt volume: {vol_label} pairs")
1763
  if push_to_hub:
1764
  if push_to_hub == "auto":
1765
- log_lines.append("Push to Hub: auto ({you}/{model}-OBLITERATED)")
 
1766
  else:
1767
  log_lines.append(f"Push to Hub: {push_to_hub}")
1768
  if quantization:
@@ -3512,8 +3527,8 @@ with gr.Blocks(theme=THEME, css=CSS, js=_JS, title="OBLITERATUS", fill_height=Tr
3512
  hub_auto_push = gr.Checkbox(
3513
  label="Auto-push to Hub",
3514
  value=False,
3515
- info="Auto-names your model {you}/{model}-OBLITERATED and pushes to HF Hub. "
3516
- "Requires HF_TOKEN env var with write access.",
3517
  )
3518
  hub_repo = gr.Textbox(
3519
  label="Push to Hub (optional override)",
@@ -4237,11 +4252,13 @@ The winner gets auto-pushed to HuggingFace Hub.
4237
  def on_round(rnd):
4238
  pass # logged via on_log
4239
 
 
 
4240
  runner = TourneyRunner(
4241
  model_name=model_id,
4242
  hub_org=hub_org_val,
4243
  hub_repo=hub_repo_val,
4244
- dataset_key=dataset or "builtin",
4245
  quantization=quant,
4246
  on_log=on_log,
4247
  on_round=on_round,
 
278
  "nuclear (maximum force combo)": "nuclear",
279
  }
280
 
281
+ # ── Community Hub push ────────────────────────────────────────────────
282
+ # Shared org + token so users can auto-push without their own HF_TOKEN.
283
+ # Set OBLITERATUS_HUB_TOKEN as a Space secret with write access to the org.
284
+ _HUB_COMMUNITY_ORG = os.environ.get("OBLITERATUS_HUB_ORG", "OBLITERATUS-community")
285
+ _HUB_COMMUNITY_TOKEN = os.environ.get("OBLITERATUS_HUB_TOKEN")
286
+
287
  # Import preset configs for Advanced Settings defaults
288
  from obliteratus.abliterate import METHODS as _PRESET_CONFIGS # noqa: E402
289
  from obliteratus.prompts import ( # noqa: E402
 
389
  "Invalid repo format β€” use `username/model-name` "
390
  "(letters, numbers, hyphens, dots only)"
391
  )
392
+ if not os.environ.get("HF_TOKEN") and not _HUB_COMMUNITY_TOKEN:
393
  warnings.append(
394
+ "No Hub token available β€” push will fail. "
395
+ "Set HF_TOKEN or OBLITERATUS_HUB_TOKEN."
396
  )
397
  if warnings:
398
  return "**Warning:** " + " | ".join(warnings)
 
1606
  )
1607
  return
1608
 
1609
+ # Early validation: Hub repo format + token availability
1610
+ # Resolve which token to use: user's own HF_TOKEN, or the shared community token.
1611
+ _user_token = os.environ.get("HF_TOKEN")
1612
+ _hub_token = _user_token or _HUB_COMMUNITY_TOKEN
1613
+ _hub_org = None if _user_token else _HUB_COMMUNITY_ORG # community org only when using shared token
1614
  if push_to_hub:
1615
  if push_to_hub != "auto" and not re.match(r'^[a-zA-Z0-9_-]+/[a-zA-Z0-9_.-]+$', push_to_hub):
1616
  yield (
 
1618
  "", gr.update(), gr.update(), gr.update(), gr.update(),
1619
  )
1620
  return
1621
+ if not _hub_token:
1622
  yield (
1623
+ "**Error:** No Hub token available. Set HF_TOKEN or OBLITERATUS_HUB_TOKEN "
1624
+ "as an environment variable or Space secret.",
1625
  "", gr.update(), gr.update(), gr.update(), gr.update(),
1626
  )
1627
  return
 
1700
  device="auto",
1701
  dtype="float16",
1702
  push_to_hub=push_to_hub,
1703
+ hub_token=_hub_token,
1704
+ hub_community_org=_hub_org,
1705
  quantization=quantization,
1706
  trust_remote_code=is_preset,
1707
  harmful_prompts=harmful_all[:n],
 
1720
  dtype="float16",
1721
  method=method,
1722
  push_to_hub=push_to_hub,
1723
+ hub_token=_hub_token,
1724
+ hub_community_org=_hub_org,
1725
  quantization=quantization,
1726
  trust_remote_code=is_preset,
1727
  harmful_prompts=harmful_all[:n],
 
1776
  log_lines.append(f"Prompt volume: {vol_label} pairs")
1777
  if push_to_hub:
1778
  if push_to_hub == "auto":
1779
+ _ns = _hub_org or "{you}"
1780
+ log_lines.append(f"Push to Hub: auto ({_ns}/{{model}}-OBLITERATED)")
1781
  else:
1782
  log_lines.append(f"Push to Hub: {push_to_hub}")
1783
  if quantization:
 
3527
  hub_auto_push = gr.Checkbox(
3528
  label="Auto-push to Hub",
3529
  value=False,
3530
+ info=f"Pushes your model to {_HUB_COMMUNITY_ORG}/{{model}}-OBLITERATED on HF Hub. "
3531
+ "No token needed β€” works out of the box!",
3532
  )
3533
  hub_repo = gr.Textbox(
3534
  label="Push to Hub (optional override)",
 
4252
  def on_round(rnd):
4253
  pass # logged via on_log
4254
 
4255
+ dataset_key = get_source_key_from_label(dataset) if dataset else "builtin"
4256
+
4257
  runner = TourneyRunner(
4258
  model_name=model_id,
4259
  hub_org=hub_org_val,
4260
  hub_repo=hub_repo_val,
4261
+ dataset_key=dataset_key,
4262
  quantization=quant,
4263
  on_log=on_log,
4264
  on_round=on_round,
obliteratus/abliterate.py CHANGED
@@ -504,21 +504,22 @@ class StageResult:
504
  details: dict[str, Any] = field(default_factory=dict)
505
 
506
 
507
- def auto_hub_repo_id(model_name: str, *, api=None) -> str:
508
- """Generate a Hub repo ID like ``{hf_user}/{short_model}-OBLITERATED``.
509
 
510
- Resolves the authenticated HF username via the API, and extracts a
511
- short model name (e.g. ``meta-llama/Llama-3-8B`` β†’ ``Llama-3-8B``).
512
  """
513
  import re
514
 
515
- if api is None:
516
- from huggingface_hub import HfApi
517
- api = HfApi()
518
-
519
- # Resolve HF username from token
520
- user_info = api.whoami()
521
- username = user_info.get("name") or user_info.get("user", "unknown")
 
522
 
523
  # Extract short model name (part after '/')
524
  short = model_name.split("/")[-1] if "/" in model_name else model_name
@@ -526,7 +527,7 @@ def auto_hub_repo_id(model_name: str, *, api=None) -> str:
526
  short = re.sub(r"[^a-zA-Z0-9\-.]", "-", short)
527
  short = re.sub(r"-+", "-", short).strip("-")
528
 
529
- return f"{username}/{short}-OBLITERATED"
530
 
531
 
532
  # ── Main pipeline ───────────────────────────────────────────────────────
@@ -559,6 +560,8 @@ class AbliterationPipeline:
559
  trust_remote_code: bool = False,
560
  method: str = "advanced",
561
  push_to_hub: str | None = None,
 
 
562
  n_directions: int | None = None,
563
  norm_preserve: bool | None = None,
564
  regularization: float | None = None,
@@ -618,6 +621,8 @@ class AbliterationPipeline:
618
  self.trust_remote_code = trust_remote_code
619
  self.large_model_mode = large_model_mode
620
  self.push_to_hub = push_to_hub
 
 
621
  self.harmful_prompts = list(harmful_prompts) if harmful_prompts is not None else list(HARMFUL_PROMPTS)
622
  self.harmless_prompts = list(harmless_prompts) if harmless_prompts is not None else list(HARMLESS_PROMPTS)
623
  if not self.harmful_prompts:
@@ -5948,11 +5953,13 @@ class AbliterationPipeline:
5948
  if self.push_to_hub:
5949
  from huggingface_hub import HfApi
5950
 
5951
- api = HfApi()
5952
 
5953
- # Resolve "auto" β†’ {hf_user}/{short_model}-OBLITERATED
5954
  if self.push_to_hub == "auto":
5955
- repo_id = auto_hub_repo_id(self.model_name, api=api)
 
 
5956
  self.log(f"Auto-named Hub repo: {repo_id}")
5957
  else:
5958
  repo_id = self.push_to_hub
 
504
  details: dict[str, Any] = field(default_factory=dict)
505
 
506
 
507
+ def auto_hub_repo_id(model_name: str, *, api=None, org: str | None = None) -> str:
508
+ """Generate a Hub repo ID like ``{namespace}/{short_model}-OBLITERATED``.
509
 
510
+ If *org* is given, uses that as the namespace (e.g. a shared community org).
511
+ Otherwise resolves the authenticated HF username via the API.
512
  """
513
  import re
514
 
515
+ if org:
516
+ namespace = org
517
+ else:
518
+ if api is None:
519
+ from huggingface_hub import HfApi
520
+ api = HfApi()
521
+ user_info = api.whoami()
522
+ namespace = user_info.get("name") or user_info.get("user", "unknown")
523
 
524
  # Extract short model name (part after '/')
525
  short = model_name.split("/")[-1] if "/" in model_name else model_name
 
527
  short = re.sub(r"[^a-zA-Z0-9\-.]", "-", short)
528
  short = re.sub(r"-+", "-", short).strip("-")
529
 
530
+ return f"{namespace}/{short}-OBLITERATED"
531
 
532
 
533
  # ── Main pipeline ───────────────────────────────────────────────────────
 
560
  trust_remote_code: bool = False,
561
  method: str = "advanced",
562
  push_to_hub: str | None = None,
563
+ hub_token: str | None = None,
564
+ hub_community_org: str | None = None,
565
  n_directions: int | None = None,
566
  norm_preserve: bool | None = None,
567
  regularization: float | None = None,
 
621
  self.trust_remote_code = trust_remote_code
622
  self.large_model_mode = large_model_mode
623
  self.push_to_hub = push_to_hub
624
+ self.hub_token = hub_token
625
+ self.hub_community_org = hub_community_org
626
  self.harmful_prompts = list(harmful_prompts) if harmful_prompts is not None else list(HARMFUL_PROMPTS)
627
  self.harmless_prompts = list(harmless_prompts) if harmless_prompts is not None else list(HARMLESS_PROMPTS)
628
  if not self.harmful_prompts:
 
5953
  if self.push_to_hub:
5954
  from huggingface_hub import HfApi
5955
 
5956
+ api = HfApi(token=self.hub_token) if self.hub_token else HfApi()
5957
 
5958
+ # Resolve "auto" β†’ {namespace}/{short_model}-OBLITERATED
5959
  if self.push_to_hub == "auto":
5960
+ repo_id = auto_hub_repo_id(
5961
+ self.model_name, api=api, org=self.hub_community_org,
5962
+ )
5963
  self.log(f"Auto-named Hub repo: {repo_id}")
5964
  else:
5965
  repo_id = self.push_to_hub
obliteratus/informed_pipeline.py CHANGED
@@ -181,6 +181,8 @@ class InformedAbliterationPipeline(AbliterationPipeline):
181
  on_log: Callable[[str], None] | None = None,
182
  # Base pipeline kwargs forwarded to AbliterationPipeline
183
  push_to_hub: str | None = None,
 
 
184
  quantization: str | None = None,
185
  # Analysis configuration
186
  run_cone_analysis: bool = True,
@@ -212,6 +214,8 @@ class InformedAbliterationPipeline(AbliterationPipeline):
212
  on_stage=on_stage,
213
  on_log=on_log,
214
  push_to_hub=push_to_hub,
 
 
215
  quantization=quantization,
216
  # Set informed defaults
217
  norm_preserve=True,
 
181
  on_log: Callable[[str], None] | None = None,
182
  # Base pipeline kwargs forwarded to AbliterationPipeline
183
  push_to_hub: str | None = None,
184
+ hub_token: str | None = None,
185
+ hub_community_org: str | None = None,
186
  quantization: str | None = None,
187
  # Analysis configuration
188
  run_cone_analysis: bool = True,
 
214
  on_stage=on_stage,
215
  on_log=on_log,
216
  push_to_hub=push_to_hub,
217
+ hub_token=hub_token,
218
+ hub_community_org=hub_community_org,
219
  quantization=quantization,
220
  # Set informed defaults
221
  norm_preserve=True,