Kernels
TaehyunKim github-actions[bot] commited on
Commit
2d926c3
·
unverified ·
1 Parent(s): 5dde6fa

fix typo in readme (#7)

Browse files

* fix typo in readme

Signed-off-by: ca1207 <ca1207zzz@gmail.com>

* Add built binary [skip-build]

---------

Signed-off-by: ca1207 <ca1207zzz@gmail.com>
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>

This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. benchmarks/README.md +3 -3
  2. build/torch27-cxx11-cu118-x86_64-linux/activation/{_activation_e5e2eeb_dirty.abi3.so → _activation_2f66548_dirty.abi3.so} +1 -1
  3. build/torch27-cxx11-cu118-x86_64-linux/activation/_ops.py +3 -3
  4. build/torch27-cxx11-cu118-x86_64-linux/activation/rms_norm.py +3 -2
  5. build/torch27-cxx11-cu126-x86_64-linux/activation/{_activation_e5e2eeb_dirty.abi3.so → _activation_2f66548_dirty.abi3.so} +1 -1
  6. build/torch27-cxx11-cu126-x86_64-linux/activation/_activation_f517c97_dirty.abi3.so +0 -3
  7. build/torch27-cxx11-cu126-x86_64-linux/activation/_ops.py +3 -3
  8. build/torch27-cxx11-cu126-x86_64-linux/activation/rms_norm.py +3 -2
  9. build/torch27-cxx11-cu128-x86_64-linux/activation/_activation_20250907180255.abi3.so +0 -3
  10. build/torch27-cxx11-cu128-x86_64-linux/activation/{_activation_e5e2eeb_dirty.abi3.so → _activation_2f66548_dirty.abi3.so} +1 -1
  11. build/torch27-cxx11-cu128-x86_64-linux/activation/_activation_f517c97_dirty.abi3.so +0 -3
  12. build/torch27-cxx11-cu128-x86_64-linux/activation/_ops.py +3 -3
  13. build/torch27-cxx11-cu128-x86_64-linux/activation/rms_norm.py +3 -2
  14. build/torch27-cxx11-rocm63-x86_64-linux/activation/_activation_20250907180255.abi3.so +0 -3
  15. build/{torch27-cxx11-cu118-x86_64-linux/activation/_activation_20250907180255.abi3.so → torch27-cxx11-rocm63-x86_64-linux/activation/_activation_2f66548_dirty.abi3.so} +2 -2
  16. build/torch27-cxx11-rocm63-x86_64-linux/activation/_activation_e5e2eeb_dirty.abi3.so +0 -3
  17. build/torch27-cxx11-rocm63-x86_64-linux/activation/_activation_f517c97_dirty.abi3.so +0 -3
  18. build/torch27-cxx11-rocm63-x86_64-linux/activation/_ops.py +3 -3
  19. build/torch27-cxx11-rocm63-x86_64-linux/activation/rms_norm.py +3 -2
  20. build/torch28-cxx11-cu126-x86_64-linux/activation/_activation_20250907180255.abi3.so +0 -3
  21. build/torch28-cxx11-cu126-x86_64-linux/activation/{_activation_e5e2eeb_dirty.abi3.so → _activation_2f66548_dirty.abi3.so} +1 -1
  22. build/torch28-cxx11-cu126-x86_64-linux/activation/_activation_f517c97_dirty.abi3.so +0 -3
  23. build/torch28-cxx11-cu126-x86_64-linux/activation/_ops.py +3 -3
  24. build/torch28-cxx11-cu126-x86_64-linux/activation/rms_norm.py +3 -2
  25. build/torch28-cxx11-cu128-x86_64-linux/activation/_activation_20250907180255.abi3.so +0 -3
  26. build/torch28-cxx11-cu128-x86_64-linux/activation/{_activation_e5e2eeb_dirty.abi3.so → _activation_2f66548_dirty.abi3.so} +1 -1
  27. build/torch28-cxx11-cu128-x86_64-linux/activation/_activation_f517c97_dirty.abi3.so +0 -3
  28. build/torch28-cxx11-cu128-x86_64-linux/activation/_ops.py +3 -3
  29. build/torch28-cxx11-cu128-x86_64-linux/activation/rms_norm.py +3 -2
  30. build/torch28-cxx11-cu129-x86_64-linux/activation/_activation_20250907180255.abi3.so +0 -3
  31. build/torch28-cxx11-cu129-x86_64-linux/activation/_activation_2f66548_dirty.abi3.so +3 -0
  32. build/torch28-cxx11-cu129-x86_64-linux/activation/_activation_e5e2eeb_dirty.abi3.so +0 -3
  33. build/torch28-cxx11-cu129-x86_64-linux/activation/_activation_f517c97_dirty.abi3.so +0 -3
  34. build/torch28-cxx11-cu129-x86_64-linux/activation/_ops.py +3 -3
  35. build/torch28-cxx11-cu129-x86_64-linux/activation/rms_norm.py +3 -2
  36. build/torch28-cxx11-rocm63-x86_64-linux/activation/_activation_20250907180255.abi3.so +0 -3
  37. build/{torch27-cxx11-cu118-x86_64-linux/activation/_activation_f517c97_dirty.abi3.so → torch28-cxx11-rocm63-x86_64-linux/activation/_activation_2f66548_dirty.abi3.so} +2 -2
  38. build/torch28-cxx11-rocm63-x86_64-linux/activation/_activation_e5e2eeb_dirty.abi3.so +0 -3
  39. build/torch28-cxx11-rocm63-x86_64-linux/activation/_activation_f517c97_dirty.abi3.so +0 -3
  40. build/torch28-cxx11-rocm63-x86_64-linux/activation/_ops.py +3 -3
  41. build/torch28-cxx11-rocm63-x86_64-linux/activation/rms_norm.py +3 -2
  42. build/torch28-cxx11-rocm64-x86_64-linux/activation/_activation_20250907180255.abi3.so +0 -3
  43. build/{torch27-cxx11-cu126-x86_64-linux/activation/_activation_20250907180255.abi3.so → torch28-cxx11-rocm64-x86_64-linux/activation/_activation_2f66548_dirty.abi3.so} +2 -2
  44. build/torch28-cxx11-rocm64-x86_64-linux/activation/_activation_e5e2eeb_dirty.abi3.so +0 -3
  45. build/torch28-cxx11-rocm64-x86_64-linux/activation/_activation_f517c97_dirty.abi3.so +0 -3
  46. build/torch28-cxx11-rocm64-x86_64-linux/activation/_ops.py +3 -3
  47. build/torch28-cxx11-rocm64-x86_64-linux/activation/rms_norm.py +3 -2
  48. build/torch29-cxx11-cu126-x86_64-linux/activation/__init__.py +52 -0
  49. build/torch29-cxx11-cu126-x86_64-linux/activation/_activation_2f66548_dirty.abi3.so +3 -0
  50. build/torch29-cxx11-cu126-x86_64-linux/activation/_ops.py +9 -0
benchmarks/README.md CHANGED
@@ -15,7 +15,7 @@ Results can be saved as **CSV files** or **plots**.
15
  ## Usage
16
 
17
  ```bash
18
- python main.py --case <CASE> [--plot] [--save-path <DIR>]
19
  ```
20
 
21
  - `--case` (required): one of `rms`, `add_rms`, `poly`, `mul_poly`
@@ -25,8 +25,8 @@ python main.py --case <CASE> [--plot] [--save-path <DIR>]
25
  ## Examples
26
 
27
  ```bash
28
- python main.py --case add_rms --save-path ./results/
29
- python main.py --case poly --plot --save-path ./plots/
30
  ```
31
 
32
  ## Output
 
15
  ## Usage
16
 
17
  ```bash
18
+ python run_cases.py --case <CASE> [--plot] [--save-path <DIR>]
19
  ```
20
 
21
  - `--case` (required): one of `rms`, `add_rms`, `poly`, `mul_poly`
 
25
  ## Examples
26
 
27
  ```bash
28
+ python run_cases.py --case add_rms --save-path ./results/
29
+ python run_cases.py --case poly --plot --save-path ./plots/
30
  ```
31
 
32
  ## Output
build/torch27-cxx11-cu118-x86_64-linux/activation/{_activation_e5e2eeb_dirty.abi3.so → _activation_2f66548_dirty.abi3.so} RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ec9ea7edc8b27f7983e20d615ab470cef6b82975afc214becfddfd05a867a839
3
  size 8600336
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eee7aed4f37c13c6accf42db53acad998b46dfc3c6fd70e976b552482a08118e
3
  size 8600336
build/torch27-cxx11-cu118-x86_64-linux/activation/_ops.py CHANGED
@@ -1,9 +1,9 @@
1
  import torch
2
- from . import _activation_e5e2eeb_dirty
3
- ops = torch.ops._activation_e5e2eeb_dirty
4
 
5
  def add_op_namespace_prefix(op_name: str):
6
  """
7
  Prefix op by namespace.
8
  """
9
- return f"_activation_e5e2eeb_dirty::{op_name}"
 
1
  import torch
2
+ from . import _activation_2f66548_dirty
3
+ ops = torch.ops._activation_2f66548_dirty
4
 
5
  def add_op_namespace_prefix(op_name: str):
6
  """
7
  Prefix op by namespace.
8
  """
9
+ return f"_activation_2f66548_dirty::{op_name}"
build/torch27-cxx11-cu118-x86_64-linux/activation/rms_norm.py CHANGED
@@ -70,8 +70,9 @@ class FusedAddRMSNormFunction(torch.autograd.Function):
70
  weight_grad = torch.empty_like(
71
  weight) if ctx.needs_input_grad[2] else None
72
 
73
- ops.fused_add_rms_norm_backward(grad, weight_grad, output_grad, add_output_grad, add_output,
74
- weight, eps)
 
75
  input_grad = grad if need_in else None
76
  residual_grad = grad if need_res else None
77
 
 
70
  weight_grad = torch.empty_like(
71
  weight) if ctx.needs_input_grad[2] else None
72
 
73
+ ops.fused_add_rms_norm_backward(grad, weight_grad, output_grad,
74
+ add_output_grad, add_output, weight,
75
+ eps)
76
  input_grad = grad if need_in else None
77
  residual_grad = grad if need_res else None
78
 
build/torch27-cxx11-cu126-x86_64-linux/activation/{_activation_e5e2eeb_dirty.abi3.so → _activation_2f66548_dirty.abi3.so} RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5d3511410cdc288d2fafc500223ed2e625e360f50fa341809cf892fb2c822924
3
  size 8779000
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:06901740688c1d1eed4d91275d28864866ac4a768ea6b97d4894f15869f5cedd
3
  size 8779000
build/torch27-cxx11-cu126-x86_64-linux/activation/_activation_f517c97_dirty.abi3.so DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:caffcadbb99fbaa27e8a81d5ef508f2e1a798e7626d618c3cf5b0d387d2c8686
3
- size 4618624
 
 
 
 
build/torch27-cxx11-cu126-x86_64-linux/activation/_ops.py CHANGED
@@ -1,9 +1,9 @@
1
  import torch
2
- from . import _activation_e5e2eeb_dirty
3
- ops = torch.ops._activation_e5e2eeb_dirty
4
 
5
  def add_op_namespace_prefix(op_name: str):
6
  """
7
  Prefix op by namespace.
8
  """
9
- return f"_activation_e5e2eeb_dirty::{op_name}"
 
1
  import torch
2
+ from . import _activation_2f66548_dirty
3
+ ops = torch.ops._activation_2f66548_dirty
4
 
5
  def add_op_namespace_prefix(op_name: str):
6
  """
7
  Prefix op by namespace.
8
  """
9
+ return f"_activation_2f66548_dirty::{op_name}"
build/torch27-cxx11-cu126-x86_64-linux/activation/rms_norm.py CHANGED
@@ -70,8 +70,9 @@ class FusedAddRMSNormFunction(torch.autograd.Function):
70
  weight_grad = torch.empty_like(
71
  weight) if ctx.needs_input_grad[2] else None
72
 
73
- ops.fused_add_rms_norm_backward(grad, weight_grad, output_grad, add_output_grad, add_output,
74
- weight, eps)
 
75
  input_grad = grad if need_in else None
76
  residual_grad = grad if need_res else None
77
 
 
70
  weight_grad = torch.empty_like(
71
  weight) if ctx.needs_input_grad[2] else None
72
 
73
+ ops.fused_add_rms_norm_backward(grad, weight_grad, output_grad,
74
+ add_output_grad, add_output, weight,
75
+ eps)
76
  input_grad = grad if need_in else None
77
  residual_grad = grad if need_res else None
78
 
build/torch27-cxx11-cu128-x86_64-linux/activation/_activation_20250907180255.abi3.so DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:0bf0d2ab5ff5520704e0b0c959b61d0043d360cfd4335950e69677873a87e436
3
- size 12792112
 
 
 
 
build/torch27-cxx11-cu128-x86_64-linux/activation/{_activation_e5e2eeb_dirty.abi3.so → _activation_2f66548_dirty.abi3.so} RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:25efc9c32e4bd6609a8326025aad861cbf79b544893755fe44519c9df7224c40
3
  size 13818872
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2b85e3e1f218392802dc8642b037f06ad7ac388d597ca9d1fb2b7314e9af5f30
3
  size 13818872
build/torch27-cxx11-cu128-x86_64-linux/activation/_activation_f517c97_dirty.abi3.so DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:3b7c6ece8e8d316c4cc5fe46b1cec4422b2f61e9bb7240af71a2b4a35975d8e6
3
- size 6676528
 
 
 
 
build/torch27-cxx11-cu128-x86_64-linux/activation/_ops.py CHANGED
@@ -1,9 +1,9 @@
1
  import torch
2
- from . import _activation_e5e2eeb_dirty
3
- ops = torch.ops._activation_e5e2eeb_dirty
4
 
5
  def add_op_namespace_prefix(op_name: str):
6
  """
7
  Prefix op by namespace.
8
  """
9
- return f"_activation_e5e2eeb_dirty::{op_name}"
 
1
  import torch
2
+ from . import _activation_2f66548_dirty
3
+ ops = torch.ops._activation_2f66548_dirty
4
 
5
  def add_op_namespace_prefix(op_name: str):
6
  """
7
  Prefix op by namespace.
8
  """
9
+ return f"_activation_2f66548_dirty::{op_name}"
build/torch27-cxx11-cu128-x86_64-linux/activation/rms_norm.py CHANGED
@@ -70,8 +70,9 @@ class FusedAddRMSNormFunction(torch.autograd.Function):
70
  weight_grad = torch.empty_like(
71
  weight) if ctx.needs_input_grad[2] else None
72
 
73
- ops.fused_add_rms_norm_backward(grad, weight_grad, output_grad, add_output_grad, add_output,
74
- weight, eps)
 
75
  input_grad = grad if need_in else None
76
  residual_grad = grad if need_res else None
77
 
 
70
  weight_grad = torch.empty_like(
71
  weight) if ctx.needs_input_grad[2] else None
72
 
73
+ ops.fused_add_rms_norm_backward(grad, weight_grad, output_grad,
74
+ add_output_grad, add_output, weight,
75
+ eps)
76
  input_grad = grad if need_in else None
77
  residual_grad = grad if need_res else None
78
 
build/torch27-cxx11-rocm63-x86_64-linux/activation/_activation_20250907180255.abi3.so DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:640322a8fac8fd9d8e9f195a3034c4ee0f81ee1acf897fd7c482a84ce47a1bec
3
- size 4160688
 
 
 
 
build/{torch27-cxx11-cu118-x86_64-linux/activation/_activation_20250907180255.abi3.so → torch27-cxx11-rocm63-x86_64-linux/activation/_activation_2f66548_dirty.abi3.so} RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d21a85bf21aa74f1281541e658acfd4f4326d902efe3578b059eccf054443284
3
- size 8089696
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:525a7d6eebafd2c18e2a15c17162b8328c5ade11f87fe9032a136e92c182e888
3
+ size 2764584
build/torch27-cxx11-rocm63-x86_64-linux/activation/_activation_e5e2eeb_dirty.abi3.so DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:c80d05690547f2842d416ebb85c9f830370373bc7e6c54ba08eec61b3690280f
3
- size 4386744
 
 
 
 
build/torch27-cxx11-rocm63-x86_64-linux/activation/_activation_f517c97_dirty.abi3.so DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:4be173820e2a4bf4b6b8de6b63faf6544b599d9b0583f650a940adaef4a048b3
3
- size 2899184
 
 
 
 
build/torch27-cxx11-rocm63-x86_64-linux/activation/_ops.py CHANGED
@@ -1,9 +1,9 @@
1
  import torch
2
- from . import _activation_e5e2eeb_dirty
3
- ops = torch.ops._activation_e5e2eeb_dirty
4
 
5
  def add_op_namespace_prefix(op_name: str):
6
  """
7
  Prefix op by namespace.
8
  """
9
- return f"_activation_e5e2eeb_dirty::{op_name}"
 
1
  import torch
2
+ from . import _activation_2f66548_dirty
3
+ ops = torch.ops._activation_2f66548_dirty
4
 
5
  def add_op_namespace_prefix(op_name: str):
6
  """
7
  Prefix op by namespace.
8
  """
9
+ return f"_activation_2f66548_dirty::{op_name}"
build/torch27-cxx11-rocm63-x86_64-linux/activation/rms_norm.py CHANGED
@@ -70,8 +70,9 @@ class FusedAddRMSNormFunction(torch.autograd.Function):
70
  weight_grad = torch.empty_like(
71
  weight) if ctx.needs_input_grad[2] else None
72
 
73
- ops.fused_add_rms_norm_backward(grad, weight_grad, output_grad, add_output_grad, add_output,
74
- weight, eps)
 
75
  input_grad = grad if need_in else None
76
  residual_grad = grad if need_res else None
77
 
 
70
  weight_grad = torch.empty_like(
71
  weight) if ctx.needs_input_grad[2] else None
72
 
73
+ ops.fused_add_rms_norm_backward(grad, weight_grad, output_grad,
74
+ add_output_grad, add_output, weight,
75
+ eps)
76
  input_grad = grad if need_in else None
77
  residual_grad = grad if need_res else None
78
 
build/torch28-cxx11-cu126-x86_64-linux/activation/_activation_20250907180255.abi3.so DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:1768d8d5072ac06d937cb5332988c6b3bfaa191f72d1369a22d2c577e9a3bca2
3
- size 8215280
 
 
 
 
build/torch28-cxx11-cu126-x86_64-linux/activation/{_activation_e5e2eeb_dirty.abi3.so → _activation_2f66548_dirty.abi3.so} RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:440f5c17a7ddaf73c506bbc84fd1405e2e188b8ceaf4977910608be6b91e89bf
3
  size 8730200
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b0d8ee61b8e62210df21e40e31560b84c4e985ba448917a8e95bf44167cb9505
3
  size 8730200
build/torch28-cxx11-cu126-x86_64-linux/activation/_activation_f517c97_dirty.abi3.so DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:cb222449350310f90f7271f34fcf9052c9eec28021fee0348130a8f239a97bf4
3
- size 4571976
 
 
 
 
build/torch28-cxx11-cu126-x86_64-linux/activation/_ops.py CHANGED
@@ -1,9 +1,9 @@
1
  import torch
2
- from . import _activation_e5e2eeb_dirty
3
- ops = torch.ops._activation_e5e2eeb_dirty
4
 
5
  def add_op_namespace_prefix(op_name: str):
6
  """
7
  Prefix op by namespace.
8
  """
9
- return f"_activation_e5e2eeb_dirty::{op_name}"
 
1
  import torch
2
+ from . import _activation_2f66548_dirty
3
+ ops = torch.ops._activation_2f66548_dirty
4
 
5
  def add_op_namespace_prefix(op_name: str):
6
  """
7
  Prefix op by namespace.
8
  """
9
+ return f"_activation_2f66548_dirty::{op_name}"
build/torch28-cxx11-cu126-x86_64-linux/activation/rms_norm.py CHANGED
@@ -70,8 +70,9 @@ class FusedAddRMSNormFunction(torch.autograd.Function):
70
  weight_grad = torch.empty_like(
71
  weight) if ctx.needs_input_grad[2] else None
72
 
73
- ops.fused_add_rms_norm_backward(grad, weight_grad, output_grad, add_output_grad, add_output,
74
- weight, eps)
 
75
  input_grad = grad if need_in else None
76
  residual_grad = grad if need_res else None
77
 
 
70
  weight_grad = torch.empty_like(
71
  weight) if ctx.needs_input_grad[2] else None
72
 
73
+ ops.fused_add_rms_norm_backward(grad, weight_grad, output_grad,
74
+ add_output_grad, add_output, weight,
75
+ eps)
76
  input_grad = grad if need_in else None
77
  residual_grad = grad if need_res else None
78
 
build/torch28-cxx11-cu128-x86_64-linux/activation/_activation_20250907180255.abi3.so DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:37a572bd877980ab8c0331ca5682191cb5a2b1f05bc69ea493a9e24f7728ba3f
3
- size 12730840
 
 
 
 
build/torch28-cxx11-cu128-x86_64-linux/activation/{_activation_e5e2eeb_dirty.abi3.so → _activation_2f66548_dirty.abi3.so} RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1dfb6d468f9cef0239d4ea47f0a247fa721befc5b8db86e1cddfc25f1814b67a
3
  size 13770064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cad1446a247f173104e280c0e38fef98cdb68aa005f76a555192b73397fdda7a
3
  size 13770064
build/torch28-cxx11-cu128-x86_64-linux/activation/_activation_f517c97_dirty.abi3.so DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:79be6527f579de1133e50a66310d7d0690649dcac63009a54b5e68809408f12a
3
- size 6634208
 
 
 
 
build/torch28-cxx11-cu128-x86_64-linux/activation/_ops.py CHANGED
@@ -1,9 +1,9 @@
1
  import torch
2
- from . import _activation_e5e2eeb_dirty
3
- ops = torch.ops._activation_e5e2eeb_dirty
4
 
5
  def add_op_namespace_prefix(op_name: str):
6
  """
7
  Prefix op by namespace.
8
  """
9
- return f"_activation_e5e2eeb_dirty::{op_name}"
 
1
  import torch
2
+ from . import _activation_2f66548_dirty
3
+ ops = torch.ops._activation_2f66548_dirty
4
 
5
  def add_op_namespace_prefix(op_name: str):
6
  """
7
  Prefix op by namespace.
8
  """
9
+ return f"_activation_2f66548_dirty::{op_name}"
build/torch28-cxx11-cu128-x86_64-linux/activation/rms_norm.py CHANGED
@@ -70,8 +70,9 @@ class FusedAddRMSNormFunction(torch.autograd.Function):
70
  weight_grad = torch.empty_like(
71
  weight) if ctx.needs_input_grad[2] else None
72
 
73
- ops.fused_add_rms_norm_backward(grad, weight_grad, output_grad, add_output_grad, add_output,
74
- weight, eps)
 
75
  input_grad = grad if need_in else None
76
  residual_grad = grad if need_res else None
77
 
 
70
  weight_grad = torch.empty_like(
71
  weight) if ctx.needs_input_grad[2] else None
72
 
73
+ ops.fused_add_rms_norm_backward(grad, weight_grad, output_grad,
74
+ add_output_grad, add_output, weight,
75
+ eps)
76
  input_grad = grad if need_in else None
77
  residual_grad = grad if need_res else None
78
 
build/torch28-cxx11-cu129-x86_64-linux/activation/_activation_20250907180255.abi3.so DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:3f15919c4cac697cde550af16256e338472400e50df751e93622350c7f626bc8
3
- size 12726208
 
 
 
 
build/torch28-cxx11-cu129-x86_64-linux/activation/_activation_2f66548_dirty.abi3.so ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9fddc0b356a5bae74d48638c35d3d0d566812a152d249534371c5e733c3d0dce
3
+ size 13753152
build/torch28-cxx11-cu129-x86_64-linux/activation/_activation_e5e2eeb_dirty.abi3.so DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:0815a50e61497b357b2b90fc28602b3f53a25da1161edd2cb0b0fbebc7c62bf6
3
- size 13757248
 
 
 
 
build/torch28-cxx11-cu129-x86_64-linux/activation/_activation_f517c97_dirty.abi3.so DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:8d95e4491d35cb022a6eaa2febbc555f203893f989a4fb1cc483b2632f141869
3
- size 6687456
 
 
 
 
build/torch28-cxx11-cu129-x86_64-linux/activation/_ops.py CHANGED
@@ -1,9 +1,9 @@
1
  import torch
2
- from . import _activation_e5e2eeb_dirty
3
- ops = torch.ops._activation_e5e2eeb_dirty
4
 
5
  def add_op_namespace_prefix(op_name: str):
6
  """
7
  Prefix op by namespace.
8
  """
9
- return f"_activation_e5e2eeb_dirty::{op_name}"
 
1
  import torch
2
+ from . import _activation_2f66548_dirty
3
+ ops = torch.ops._activation_2f66548_dirty
4
 
5
  def add_op_namespace_prefix(op_name: str):
6
  """
7
  Prefix op by namespace.
8
  """
9
+ return f"_activation_2f66548_dirty::{op_name}"
build/torch28-cxx11-cu129-x86_64-linux/activation/rms_norm.py CHANGED
@@ -70,8 +70,9 @@ class FusedAddRMSNormFunction(torch.autograd.Function):
70
  weight_grad = torch.empty_like(
71
  weight) if ctx.needs_input_grad[2] else None
72
 
73
- ops.fused_add_rms_norm_backward(grad, weight_grad, output_grad, add_output_grad, add_output,
74
- weight, eps)
 
75
  input_grad = grad if need_in else None
76
  residual_grad = grad if need_res else None
77
 
 
70
  weight_grad = torch.empty_like(
71
  weight) if ctx.needs_input_grad[2] else None
72
 
73
+ ops.fused_add_rms_norm_backward(grad, weight_grad, output_grad,
74
+ add_output_grad, add_output, weight,
75
+ eps)
76
  input_grad = grad if need_in else None
77
  residual_grad = grad if need_res else None
78
 
build/torch28-cxx11-rocm63-x86_64-linux/activation/_activation_20250907180255.abi3.so DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:e72d4bb4459a5da96ca5eda1d305237a361140f0e25360e3d20326a22f1b6d47
3
- size 4165584
 
 
 
 
build/{torch27-cxx11-cu118-x86_64-linux/activation/_activation_f517c97_dirty.abi3.so → torch28-cxx11-rocm63-x86_64-linux/activation/_activation_2f66548_dirty.abi3.so} RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bd84c828d4c15e96d65d6c8f0eb7a945ee8167d92e978b2ebce03eeaf41e7fce
3
- size 4405112
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3bebd1aa6a3414be5d77c1306c50310d05df8d75a7ea6c3dbd2bb6bb41a98685
3
+ size 2765376
build/torch28-cxx11-rocm63-x86_64-linux/activation/_activation_e5e2eeb_dirty.abi3.so DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:4d404c88b72f1b6da551a64b3373395e80403a52ccff14fc401be3e8ee184d83
3
- size 4387536
 
 
 
 
build/torch28-cxx11-rocm63-x86_64-linux/activation/_activation_f517c97_dirty.abi3.so DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:58116124bb2b5d11de2753dd0c30a1e4c84759f18599da7016c791bad37528e9
3
- size 2899984
 
 
 
 
build/torch28-cxx11-rocm63-x86_64-linux/activation/_ops.py CHANGED
@@ -1,9 +1,9 @@
1
  import torch
2
- from . import _activation_e5e2eeb_dirty
3
- ops = torch.ops._activation_e5e2eeb_dirty
4
 
5
  def add_op_namespace_prefix(op_name: str):
6
  """
7
  Prefix op by namespace.
8
  """
9
- return f"_activation_e5e2eeb_dirty::{op_name}"
 
1
  import torch
2
+ from . import _activation_2f66548_dirty
3
+ ops = torch.ops._activation_2f66548_dirty
4
 
5
  def add_op_namespace_prefix(op_name: str):
6
  """
7
  Prefix op by namespace.
8
  """
9
+ return f"_activation_2f66548_dirty::{op_name}"
build/torch28-cxx11-rocm63-x86_64-linux/activation/rms_norm.py CHANGED
@@ -70,8 +70,9 @@ class FusedAddRMSNormFunction(torch.autograd.Function):
70
  weight_grad = torch.empty_like(
71
  weight) if ctx.needs_input_grad[2] else None
72
 
73
- ops.fused_add_rms_norm_backward(grad, weight_grad, output_grad, add_output_grad, add_output,
74
- weight, eps)
 
75
  input_grad = grad if need_in else None
76
  residual_grad = grad if need_res else None
77
 
 
70
  weight_grad = torch.empty_like(
71
  weight) if ctx.needs_input_grad[2] else None
72
 
73
+ ops.fused_add_rms_norm_backward(grad, weight_grad, output_grad,
74
+ add_output_grad, add_output, weight,
75
+ eps)
76
  input_grad = grad if need_in else None
77
  residual_grad = grad if need_res else None
78
 
build/torch28-cxx11-rocm64-x86_64-linux/activation/_activation_20250907180255.abi3.so DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:c3325c2748cf7a070383068995078f93f440cc95fbed491d00bd414cdd851376
3
- size 4171472
 
 
 
 
build/{torch27-cxx11-cu126-x86_64-linux/activation/_activation_20250907180255.abi3.so → torch28-cxx11-rocm64-x86_64-linux/activation/_activation_2f66548_dirty.abi3.so} RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:74d4955271509451b946495da75f69a0f978e7258b8303fe3c077e585c0d3e6a
3
- size 8272456
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:49b998e99c3b26775a59f8bdfe2033fd7871ce4743f8234a4d8360c9867afdaf
3
+ size 2771080
build/torch28-cxx11-rocm64-x86_64-linux/activation/_activation_e5e2eeb_dirty.abi3.so DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:b8d52dee20ba3c4619f7c614984f656f34f32dd74ba6cf866cf80f32245117cf
3
- size 4393240
 
 
 
 
build/torch28-cxx11-rocm64-x86_64-linux/activation/_activation_f517c97_dirty.abi3.so DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:65319d3d93ac3bf0f2939fa4e53ddfc8cd633b9e396cde3a97d63b9041ba03a7
3
- size 2885344
 
 
 
 
build/torch28-cxx11-rocm64-x86_64-linux/activation/_ops.py CHANGED
@@ -1,9 +1,9 @@
1
  import torch
2
- from . import _activation_e5e2eeb_dirty
3
- ops = torch.ops._activation_e5e2eeb_dirty
4
 
5
  def add_op_namespace_prefix(op_name: str):
6
  """
7
  Prefix op by namespace.
8
  """
9
- return f"_activation_e5e2eeb_dirty::{op_name}"
 
1
  import torch
2
+ from . import _activation_2f66548_dirty
3
+ ops = torch.ops._activation_2f66548_dirty
4
 
5
  def add_op_namespace_prefix(op_name: str):
6
  """
7
  Prefix op by namespace.
8
  """
9
+ return f"_activation_2f66548_dirty::{op_name}"
build/torch28-cxx11-rocm64-x86_64-linux/activation/rms_norm.py CHANGED
@@ -70,8 +70,9 @@ class FusedAddRMSNormFunction(torch.autograd.Function):
70
  weight_grad = torch.empty_like(
71
  weight) if ctx.needs_input_grad[2] else None
72
 
73
- ops.fused_add_rms_norm_backward(grad, weight_grad, output_grad, add_output_grad, add_output,
74
- weight, eps)
 
75
  input_grad = grad if need_in else None
76
  residual_grad = grad if need_res else None
77
 
 
70
  weight_grad = torch.empty_like(
71
  weight) if ctx.needs_input_grad[2] else None
72
 
73
+ ops.fused_add_rms_norm_backward(grad, weight_grad, output_grad,
74
+ add_output_grad, add_output, weight,
75
+ eps)
76
  input_grad = grad if need_in else None
77
  residual_grad = grad if need_res else None
78
 
build/torch29-cxx11-cu126-x86_64-linux/activation/__init__.py ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+
3
+ from . import layers
4
+ from ._ops import ops
5
+ from .poly_norm import FusedMulPolyNormFunction, PolyNormFunction
6
+ from .rms_norm import FusedAddRMSNormFunction, RMSNormFunction
7
+
8
+
9
+ def poly_norm(
10
+ x: torch.Tensor,
11
+ weight: torch.Tensor,
12
+ bias: torch.Tensor,
13
+ eps: float = 1e-6,
14
+ ) -> None:
15
+ return PolyNormFunction.apply(x, weight, bias, eps)
16
+
17
+
18
+ def fused_mul_poly_norm(
19
+ x: torch.Tensor,
20
+ mul: torch.Tensor,
21
+ weight: torch.Tensor,
22
+ bias: torch.Tensor,
23
+ eps: float = 1e-6,
24
+ ) -> None:
25
+ return FusedMulPolyNormFunction.apply(x, mul, weight, bias, eps)
26
+
27
+
28
+ def rms_norm(
29
+ x: torch.Tensor,
30
+ weight: torch.Tensor,
31
+ eps: float = 1e-6,
32
+ ) -> None:
33
+ return RMSNormFunction.apply(x, weight, eps)
34
+
35
+
36
+ def fused_add_rms_norm(
37
+ x: torch.Tensor,
38
+ residual: torch.Tensor,
39
+ weight: torch.Tensor,
40
+ eps: float = 1e-6,
41
+ ) -> None:
42
+ return FusedAddRMSNormFunction.apply(x, residual, weight, eps)
43
+
44
+
45
+ __all__ = [
46
+ "poly_norm",
47
+ "fused_mul_poly_norm",
48
+ "rms_norm",
49
+ "fused_add_rms_norm",
50
+ "layers",
51
+ "ops",
52
+ ]
build/torch29-cxx11-cu126-x86_64-linux/activation/_activation_2f66548_dirty.abi3.so ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2fd8313bde2684cf629375637613278a7a0575272924a30a4a290fdbb339910c
3
+ size 8730176
build/torch29-cxx11-cu126-x86_64-linux/activation/_ops.py ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from . import _activation_2f66548_dirty
3
+ ops = torch.ops._activation_2f66548_dirty
4
+
5
+ def add_op_namespace_prefix(op_name: str):
6
+ """
7
+ Prefix op by namespace.
8
+ """
9
+ return f"_activation_2f66548_dirty::{op_name}"