Spaces:
Running on Zero
Running on Zero
Upload 130 files
Browse files- app.py +28 -11
- obliteratus/abliterate.py +22 -15
- obliteratus/informed_pipeline.py +4 -0
app.py
CHANGED
|
@@ -278,6 +278,12 @@ METHODS = {
|
|
| 278 |
"nuclear (maximum force combo)": "nuclear",
|
| 279 |
}
|
| 280 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 281 |
# Import preset configs for Advanced Settings defaults
|
| 282 |
from obliteratus.abliterate import METHODS as _PRESET_CONFIGS # noqa: E402
|
| 283 |
from obliteratus.prompts import ( # noqa: E402
|
|
@@ -383,10 +389,10 @@ def _validate_hub_repo(hub_repo: str) -> str:
|
|
| 383 |
"Invalid repo format β use `username/model-name` "
|
| 384 |
"(letters, numbers, hyphens, dots only)"
|
| 385 |
)
|
| 386 |
-
if not os.environ.get("HF_TOKEN"):
|
| 387 |
warnings.append(
|
| 388 |
-
"
|
| 389 |
-
"Set
|
| 390 |
)
|
| 391 |
if warnings:
|
| 392 |
return "**Warning:** " + " | ".join(warnings)
|
|
@@ -1600,7 +1606,11 @@ def obliterate(model_choice: str, method_choice: str,
|
|
| 1600 |
)
|
| 1601 |
return
|
| 1602 |
|
| 1603 |
-
# Early validation: Hub repo format +
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1604 |
if push_to_hub:
|
| 1605 |
if push_to_hub != "auto" and not re.match(r'^[a-zA-Z0-9_-]+/[a-zA-Z0-9_.-]+$', push_to_hub):
|
| 1606 |
yield (
|
|
@@ -1608,10 +1618,10 @@ def obliterate(model_choice: str, method_choice: str,
|
|
| 1608 |
"", gr.update(), gr.update(), gr.update(), gr.update(),
|
| 1609 |
)
|
| 1610 |
return
|
| 1611 |
-
if not
|
| 1612 |
yield (
|
| 1613 |
-
"**Error:**
|
| 1614 |
-
"
|
| 1615 |
"", gr.update(), gr.update(), gr.update(), gr.update(),
|
| 1616 |
)
|
| 1617 |
return
|
|
@@ -1690,6 +1700,8 @@ def obliterate(model_choice: str, method_choice: str,
|
|
| 1690 |
device="auto",
|
| 1691 |
dtype="float16",
|
| 1692 |
push_to_hub=push_to_hub,
|
|
|
|
|
|
|
| 1693 |
quantization=quantization,
|
| 1694 |
trust_remote_code=is_preset,
|
| 1695 |
harmful_prompts=harmful_all[:n],
|
|
@@ -1708,6 +1720,8 @@ def obliterate(model_choice: str, method_choice: str,
|
|
| 1708 |
dtype="float16",
|
| 1709 |
method=method,
|
| 1710 |
push_to_hub=push_to_hub,
|
|
|
|
|
|
|
| 1711 |
quantization=quantization,
|
| 1712 |
trust_remote_code=is_preset,
|
| 1713 |
harmful_prompts=harmful_all[:n],
|
|
@@ -1762,7 +1776,8 @@ def obliterate(model_choice: str, method_choice: str,
|
|
| 1762 |
log_lines.append(f"Prompt volume: {vol_label} pairs")
|
| 1763 |
if push_to_hub:
|
| 1764 |
if push_to_hub == "auto":
|
| 1765 |
-
|
|
|
|
| 1766 |
else:
|
| 1767 |
log_lines.append(f"Push to Hub: {push_to_hub}")
|
| 1768 |
if quantization:
|
|
@@ -3512,8 +3527,8 @@ with gr.Blocks(theme=THEME, css=CSS, js=_JS, title="OBLITERATUS", fill_height=Tr
|
|
| 3512 |
hub_auto_push = gr.Checkbox(
|
| 3513 |
label="Auto-push to Hub",
|
| 3514 |
value=False,
|
| 3515 |
-
info="
|
| 3516 |
-
"
|
| 3517 |
)
|
| 3518 |
hub_repo = gr.Textbox(
|
| 3519 |
label="Push to Hub (optional override)",
|
|
@@ -4237,11 +4252,13 @@ The winner gets auto-pushed to HuggingFace Hub.
|
|
| 4237 |
def on_round(rnd):
|
| 4238 |
pass # logged via on_log
|
| 4239 |
|
|
|
|
|
|
|
| 4240 |
runner = TourneyRunner(
|
| 4241 |
model_name=model_id,
|
| 4242 |
hub_org=hub_org_val,
|
| 4243 |
hub_repo=hub_repo_val,
|
| 4244 |
-
dataset_key=
|
| 4245 |
quantization=quant,
|
| 4246 |
on_log=on_log,
|
| 4247 |
on_round=on_round,
|
|
|
|
| 278 |
"nuclear (maximum force combo)": "nuclear",
|
| 279 |
}
|
| 280 |
|
| 281 |
+
# ββ Community Hub push ββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 282 |
+
# Shared org + token so users can auto-push without their own HF_TOKEN.
|
| 283 |
+
# Set OBLITERATUS_HUB_TOKEN as a Space secret with write access to the org.
|
| 284 |
+
_HUB_COMMUNITY_ORG = os.environ.get("OBLITERATUS_HUB_ORG", "OBLITERATUS-community")
|
| 285 |
+
_HUB_COMMUNITY_TOKEN = os.environ.get("OBLITERATUS_HUB_TOKEN")
|
| 286 |
+
|
| 287 |
# Import preset configs for Advanced Settings defaults
|
| 288 |
from obliteratus.abliterate import METHODS as _PRESET_CONFIGS # noqa: E402
|
| 289 |
from obliteratus.prompts import ( # noqa: E402
|
|
|
|
| 389 |
"Invalid repo format β use `username/model-name` "
|
| 390 |
"(letters, numbers, hyphens, dots only)"
|
| 391 |
)
|
| 392 |
+
if not os.environ.get("HF_TOKEN") and not _HUB_COMMUNITY_TOKEN:
|
| 393 |
warnings.append(
|
| 394 |
+
"No Hub token available β push will fail. "
|
| 395 |
+
"Set HF_TOKEN or OBLITERATUS_HUB_TOKEN."
|
| 396 |
)
|
| 397 |
if warnings:
|
| 398 |
return "**Warning:** " + " | ".join(warnings)
|
|
|
|
| 1606 |
)
|
| 1607 |
return
|
| 1608 |
|
| 1609 |
+
# Early validation: Hub repo format + token availability
|
| 1610 |
+
# Resolve which token to use: user's own HF_TOKEN, or the shared community token.
|
| 1611 |
+
_user_token = os.environ.get("HF_TOKEN")
|
| 1612 |
+
_hub_token = _user_token or _HUB_COMMUNITY_TOKEN
|
| 1613 |
+
_hub_org = None if _user_token else _HUB_COMMUNITY_ORG # community org only when using shared token
|
| 1614 |
if push_to_hub:
|
| 1615 |
if push_to_hub != "auto" and not re.match(r'^[a-zA-Z0-9_-]+/[a-zA-Z0-9_.-]+$', push_to_hub):
|
| 1616 |
yield (
|
|
|
|
| 1618 |
"", gr.update(), gr.update(), gr.update(), gr.update(),
|
| 1619 |
)
|
| 1620 |
return
|
| 1621 |
+
if not _hub_token:
|
| 1622 |
yield (
|
| 1623 |
+
"**Error:** No Hub token available. Set HF_TOKEN or OBLITERATUS_HUB_TOKEN "
|
| 1624 |
+
"as an environment variable or Space secret.",
|
| 1625 |
"", gr.update(), gr.update(), gr.update(), gr.update(),
|
| 1626 |
)
|
| 1627 |
return
|
|
|
|
| 1700 |
device="auto",
|
| 1701 |
dtype="float16",
|
| 1702 |
push_to_hub=push_to_hub,
|
| 1703 |
+
hub_token=_hub_token,
|
| 1704 |
+
hub_community_org=_hub_org,
|
| 1705 |
quantization=quantization,
|
| 1706 |
trust_remote_code=is_preset,
|
| 1707 |
harmful_prompts=harmful_all[:n],
|
|
|
|
| 1720 |
dtype="float16",
|
| 1721 |
method=method,
|
| 1722 |
push_to_hub=push_to_hub,
|
| 1723 |
+
hub_token=_hub_token,
|
| 1724 |
+
hub_community_org=_hub_org,
|
| 1725 |
quantization=quantization,
|
| 1726 |
trust_remote_code=is_preset,
|
| 1727 |
harmful_prompts=harmful_all[:n],
|
|
|
|
| 1776 |
log_lines.append(f"Prompt volume: {vol_label} pairs")
|
| 1777 |
if push_to_hub:
|
| 1778 |
if push_to_hub == "auto":
|
| 1779 |
+
_ns = _hub_org or "{you}"
|
| 1780 |
+
log_lines.append(f"Push to Hub: auto ({_ns}/{{model}}-OBLITERATED)")
|
| 1781 |
else:
|
| 1782 |
log_lines.append(f"Push to Hub: {push_to_hub}")
|
| 1783 |
if quantization:
|
|
|
|
| 3527 |
hub_auto_push = gr.Checkbox(
|
| 3528 |
label="Auto-push to Hub",
|
| 3529 |
value=False,
|
| 3530 |
+
info=f"Pushes your model to {_HUB_COMMUNITY_ORG}/{{model}}-OBLITERATED on HF Hub. "
|
| 3531 |
+
"No token needed β works out of the box!",
|
| 3532 |
)
|
| 3533 |
hub_repo = gr.Textbox(
|
| 3534 |
label="Push to Hub (optional override)",
|
|
|
|
| 4252 |
def on_round(rnd):
|
| 4253 |
pass # logged via on_log
|
| 4254 |
|
| 4255 |
+
dataset_key = get_source_key_from_label(dataset) if dataset else "builtin"
|
| 4256 |
+
|
| 4257 |
runner = TourneyRunner(
|
| 4258 |
model_name=model_id,
|
| 4259 |
hub_org=hub_org_val,
|
| 4260 |
hub_repo=hub_repo_val,
|
| 4261 |
+
dataset_key=dataset_key,
|
| 4262 |
quantization=quant,
|
| 4263 |
on_log=on_log,
|
| 4264 |
on_round=on_round,
|
obliteratus/abliterate.py
CHANGED
|
@@ -504,21 +504,22 @@ class StageResult:
|
|
| 504 |
details: dict[str, Any] = field(default_factory=dict)
|
| 505 |
|
| 506 |
|
| 507 |
-
def auto_hub_repo_id(model_name: str, *, api=None) -> str:
|
| 508 |
-
"""Generate a Hub repo ID like ``{
|
| 509 |
|
| 510 |
-
|
| 511 |
-
|
| 512 |
"""
|
| 513 |
import re
|
| 514 |
|
| 515 |
-
if
|
| 516 |
-
|
| 517 |
-
|
| 518 |
-
|
| 519 |
-
|
| 520 |
-
|
| 521 |
-
|
|
|
|
| 522 |
|
| 523 |
# Extract short model name (part after '/')
|
| 524 |
short = model_name.split("/")[-1] if "/" in model_name else model_name
|
|
@@ -526,7 +527,7 @@ def auto_hub_repo_id(model_name: str, *, api=None) -> str:
|
|
| 526 |
short = re.sub(r"[^a-zA-Z0-9\-.]", "-", short)
|
| 527 |
short = re.sub(r"-+", "-", short).strip("-")
|
| 528 |
|
| 529 |
-
return f"{
|
| 530 |
|
| 531 |
|
| 532 |
# ββ Main pipeline βββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
|
@@ -559,6 +560,8 @@ class AbliterationPipeline:
|
|
| 559 |
trust_remote_code: bool = False,
|
| 560 |
method: str = "advanced",
|
| 561 |
push_to_hub: str | None = None,
|
|
|
|
|
|
|
| 562 |
n_directions: int | None = None,
|
| 563 |
norm_preserve: bool | None = None,
|
| 564 |
regularization: float | None = None,
|
|
@@ -618,6 +621,8 @@ class AbliterationPipeline:
|
|
| 618 |
self.trust_remote_code = trust_remote_code
|
| 619 |
self.large_model_mode = large_model_mode
|
| 620 |
self.push_to_hub = push_to_hub
|
|
|
|
|
|
|
| 621 |
self.harmful_prompts = list(harmful_prompts) if harmful_prompts is not None else list(HARMFUL_PROMPTS)
|
| 622 |
self.harmless_prompts = list(harmless_prompts) if harmless_prompts is not None else list(HARMLESS_PROMPTS)
|
| 623 |
if not self.harmful_prompts:
|
|
@@ -5948,11 +5953,13 @@ class AbliterationPipeline:
|
|
| 5948 |
if self.push_to_hub:
|
| 5949 |
from huggingface_hub import HfApi
|
| 5950 |
|
| 5951 |
-
api = HfApi()
|
| 5952 |
|
| 5953 |
-
# Resolve "auto" β {
|
| 5954 |
if self.push_to_hub == "auto":
|
| 5955 |
-
repo_id = auto_hub_repo_id(
|
|
|
|
|
|
|
| 5956 |
self.log(f"Auto-named Hub repo: {repo_id}")
|
| 5957 |
else:
|
| 5958 |
repo_id = self.push_to_hub
|
|
|
|
| 504 |
details: dict[str, Any] = field(default_factory=dict)
|
| 505 |
|
| 506 |
|
| 507 |
+
def auto_hub_repo_id(model_name: str, *, api=None, org: str | None = None) -> str:
|
| 508 |
+
"""Generate a Hub repo ID like ``{namespace}/{short_model}-OBLITERATED``.
|
| 509 |
|
| 510 |
+
If *org* is given, uses that as the namespace (e.g. a shared community org).
|
| 511 |
+
Otherwise resolves the authenticated HF username via the API.
|
| 512 |
"""
|
| 513 |
import re
|
| 514 |
|
| 515 |
+
if org:
|
| 516 |
+
namespace = org
|
| 517 |
+
else:
|
| 518 |
+
if api is None:
|
| 519 |
+
from huggingface_hub import HfApi
|
| 520 |
+
api = HfApi()
|
| 521 |
+
user_info = api.whoami()
|
| 522 |
+
namespace = user_info.get("name") or user_info.get("user", "unknown")
|
| 523 |
|
| 524 |
# Extract short model name (part after '/')
|
| 525 |
short = model_name.split("/")[-1] if "/" in model_name else model_name
|
|
|
|
| 527 |
short = re.sub(r"[^a-zA-Z0-9\-.]", "-", short)
|
| 528 |
short = re.sub(r"-+", "-", short).strip("-")
|
| 529 |
|
| 530 |
+
return f"{namespace}/{short}-OBLITERATED"
|
| 531 |
|
| 532 |
|
| 533 |
# ββ Main pipeline βββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
|
|
|
| 560 |
trust_remote_code: bool = False,
|
| 561 |
method: str = "advanced",
|
| 562 |
push_to_hub: str | None = None,
|
| 563 |
+
hub_token: str | None = None,
|
| 564 |
+
hub_community_org: str | None = None,
|
| 565 |
n_directions: int | None = None,
|
| 566 |
norm_preserve: bool | None = None,
|
| 567 |
regularization: float | None = None,
|
|
|
|
| 621 |
self.trust_remote_code = trust_remote_code
|
| 622 |
self.large_model_mode = large_model_mode
|
| 623 |
self.push_to_hub = push_to_hub
|
| 624 |
+
self.hub_token = hub_token
|
| 625 |
+
self.hub_community_org = hub_community_org
|
| 626 |
self.harmful_prompts = list(harmful_prompts) if harmful_prompts is not None else list(HARMFUL_PROMPTS)
|
| 627 |
self.harmless_prompts = list(harmless_prompts) if harmless_prompts is not None else list(HARMLESS_PROMPTS)
|
| 628 |
if not self.harmful_prompts:
|
|
|
|
| 5953 |
if self.push_to_hub:
|
| 5954 |
from huggingface_hub import HfApi
|
| 5955 |
|
| 5956 |
+
api = HfApi(token=self.hub_token) if self.hub_token else HfApi()
|
| 5957 |
|
| 5958 |
+
# Resolve "auto" β {namespace}/{short_model}-OBLITERATED
|
| 5959 |
if self.push_to_hub == "auto":
|
| 5960 |
+
repo_id = auto_hub_repo_id(
|
| 5961 |
+
self.model_name, api=api, org=self.hub_community_org,
|
| 5962 |
+
)
|
| 5963 |
self.log(f"Auto-named Hub repo: {repo_id}")
|
| 5964 |
else:
|
| 5965 |
repo_id = self.push_to_hub
|
obliteratus/informed_pipeline.py
CHANGED
|
@@ -181,6 +181,8 @@ class InformedAbliterationPipeline(AbliterationPipeline):
|
|
| 181 |
on_log: Callable[[str], None] | None = None,
|
| 182 |
# Base pipeline kwargs forwarded to AbliterationPipeline
|
| 183 |
push_to_hub: str | None = None,
|
|
|
|
|
|
|
| 184 |
quantization: str | None = None,
|
| 185 |
# Analysis configuration
|
| 186 |
run_cone_analysis: bool = True,
|
|
@@ -212,6 +214,8 @@ class InformedAbliterationPipeline(AbliterationPipeline):
|
|
| 212 |
on_stage=on_stage,
|
| 213 |
on_log=on_log,
|
| 214 |
push_to_hub=push_to_hub,
|
|
|
|
|
|
|
| 215 |
quantization=quantization,
|
| 216 |
# Set informed defaults
|
| 217 |
norm_preserve=True,
|
|
|
|
| 181 |
on_log: Callable[[str], None] | None = None,
|
| 182 |
# Base pipeline kwargs forwarded to AbliterationPipeline
|
| 183 |
push_to_hub: str | None = None,
|
| 184 |
+
hub_token: str | None = None,
|
| 185 |
+
hub_community_org: str | None = None,
|
| 186 |
quantization: str | None = None,
|
| 187 |
# Analysis configuration
|
| 188 |
run_cone_analysis: bool = True,
|
|
|
|
| 214 |
on_stage=on_stage,
|
| 215 |
on_log=on_log,
|
| 216 |
push_to_hub=push_to_hub,
|
| 217 |
+
hub_token=hub_token,
|
| 218 |
+
hub_community_org=hub_community_org,
|
| 219 |
quantization=quantization,
|
| 220 |
# Set informed defaults
|
| 221 |
norm_preserve=True,
|