From 55337813d5b54eaba037b12e33e6539e2f246781 Mon Sep 17 00:00:00 2001
From: Yahweh Rapha Bradford <166758746+El-o-heka@users.noreply.github.com>
Date: Sat, 4 May 2024 08:09:00 -0400
Subject: [PATCH 01/12] Update LICENSE.txt

Voided
---
 LICENSE.txt | 202 +---------------------------------------------------
 1 file changed, 1 insertion(+), 201 deletions(-)

diff --git a/LICENSE.txt b/LICENSE.txt
index d645695..8e38ea4 100644
--- a/LICENSE.txt
+++ b/LICENSE.txt
@@ -1,202 +1,2 @@
 
-                                 Apache License
-                           Version 2.0, January 2004
-                        http://www.apache.org/licenses/
-
-   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
-
-   1. Definitions.
-
-      "License" shall mean the terms and conditions for use, reproduction,
-      and distribution as defined by Sections 1 through 9 of this document.
-
-      "Licensor" shall mean the copyright owner or entity authorized by
-      the copyright owner that is granting the License.
-
-      "Legal Entity" shall mean the union of the acting entity and all
-      other entities that control, are controlled by, or are under common
-      control with that entity. For the purposes of this definition,
-      "control" means (i) the power, direct or indirect, to cause the
-      direction or management of such entity, whether by contract or
-      otherwise, or (ii) ownership of fifty percent (50%) or more of the
-      outstanding shares, or (iii) beneficial ownership of such entity.
-
-      "You" (or "Your") shall mean an individual or Legal Entity
-      exercising permissions granted by this License.
-
-      "Source" form shall mean the preferred form for making modifications,
-      including but not limited to software source code, documentation
-      source, and configuration files.
-
-      "Object" form shall mean any form resulting from mechanical
-      transformation or translation of a Source form, including but
-      not limited to compiled object code, generated documentation,
-      and conversions to other media types.
-
-      "Work" shall mean the work of authorship, whether in Source or
-      Object form, made available under the License, as indicated by a
-      copyright notice that is included in or attached to the work
-      (an example is provided in the Appendix below).
-
-      "Derivative Works" shall mean any work, whether in Source or Object
-      form, that is based on (or derived from) the Work and for which the
-      editorial revisions, annotations, elaborations, or other modifications
-      represent, as a whole, an original work of authorship. For the purposes
-      of this License, Derivative Works shall not include works that remain
-      separable from, or merely link (or bind by name) to the interfaces of,
-      the Work and Derivative Works thereof.
-
-      "Contribution" shall mean any work of authorship, including
-      the original version of the Work and any modifications or additions
-      to that Work or Derivative Works thereof, that is intentionally
-      submitted to Licensor for inclusion in the Work by the copyright owner
-      or by an individual or Legal Entity authorized to submit on behalf of
-      the copyright owner. For the purposes of this definition, "submitted"
-      means any form of electronic, verbal, or written communication sent
-      to the Licensor or its representatives, including but not limited to
-      communication on electronic mailing lists, source code control systems,
-      and issue tracking systems that are managed by, or on behalf of, the
-      Licensor for the purpose of discussing and improving the Work, but
-      excluding communication that is conspicuously marked or otherwise
-      designated in writing by the copyright owner as "Not a Contribution."
-
-      "Contributor" shall mean Licensor and any individual or Legal Entity
-      on behalf of whom a Contribution has been received by Licensor and
-      subsequently incorporated within the Work.
-
-   2. Grant of Copyright License. Subject to the terms and conditions of
-      this License, each Contributor hereby grants to You a perpetual,
-      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
-      copyright license to reproduce, prepare Derivative Works of,
-      publicly display, publicly perform, sublicense, and distribute the
-      Work and such Derivative Works in Source or Object form.
-
-   3. Grant of Patent License. Subject to the terms and conditions of
-      this License, each Contributor hereby grants to You a perpetual,
-      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
-      (except as stated in this section) patent license to make, have made,
-      use, offer to sell, sell, import, and otherwise transfer the Work,
-      where such license applies only to those patent claims licensable
-      by such Contributor that are necessarily infringed by their
-      Contribution(s) alone or by combination of their Contribution(s)
-      with the Work to which such Contribution(s) was submitted. If You
-      institute patent litigation against any entity (including a
-      cross-claim or counterclaim in a lawsuit) alleging that the Work
-      or a Contribution incorporated within the Work constitutes direct
-      or contributory patent infringement, then any patent licenses
-      granted to You under this License for that Work shall terminate
-      as of the date such litigation is filed.
-
-   4. Redistribution. You may reproduce and distribute copies of the
-      Work or Derivative Works thereof in any medium, with or without
-      modifications, and in Source or Object form, provided that You
-      meet the following conditions:
-
-      (a) You must give any other recipients of the Work or
-          Derivative Works a copy of this License; and
-
-      (b) You must cause any modified files to carry prominent notices
-          stating that You changed the files; and
-
-      (c) You must retain, in the Source form of any Derivative Works
-          that You distribute, all copyright, patent, trademark, and
-          attribution notices from the Source form of the Work,
-          excluding those notices that do not pertain to any part of
-          the Derivative Works; and
-
-      (d) If the Work includes a "NOTICE" text file as part of its
-          distribution, then any Derivative Works that You distribute must
-          include a readable copy of the attribution notices contained
-          within such NOTICE file, excluding those notices that do not
-          pertain to any part of the Derivative Works, in at least one
-          of the following places: within a NOTICE text file distributed
-          as part of the Derivative Works; within the Source form or
-          documentation, if provided along with the Derivative Works; or,
-          within a display generated by the Derivative Works, if and
-          wherever such third-party notices normally appear. The contents
-          of the NOTICE file are for informational purposes only and
-          do not modify the License. You may add Your own attribution
-          notices within Derivative Works that You distribute, alongside
-          or as an addendum to the NOTICE text from the Work, provided
-          that such additional attribution notices cannot be construed
-          as modifying the License.
-
-      You may add Your own copyright statement to Your modifications and
-      may provide additional or different license terms and conditions
-      for use, reproduction, or distribution of Your modifications, or
-      for any such Derivative Works as a whole, provided Your use,
-      reproduction, and distribution of the Work otherwise complies with
-      the conditions stated in this License.
-
-   5. Submission of Contributions. Unless You explicitly state otherwise,
-      any Contribution intentionally submitted for inclusion in the Work
-      by You to the Licensor shall be under the terms and conditions of
-      this License, without any additional terms or conditions.
-      Notwithstanding the above, nothing herein shall supersede or modify
-      the terms of any separate license agreement you may have executed
-      with Licensor regarding such Contributions.
-
-   6. Trademarks. This License does not grant permission to use the trade
-      names, trademarks, service marks, or product names of the Licensor,
-      except as required for reasonable and customary use in describing the
-      origin of the Work and reproducing the content of the NOTICE file.
-
-   7. Disclaimer of Warranty. Unless required by applicable law or
-      agreed to in writing, Licensor provides the Work (and each
-      Contributor provides its Contributions) on an "AS IS" BASIS,
-      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
-      implied, including, without limitation, any warranties or conditions
-      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
-      PARTICULAR PURPOSE. You are solely responsible for determining the
-      appropriateness of using or redistributing the Work and assume any
-      risks associated with Your exercise of permissions under this License.
-
-   8. Limitation of Liability. In no event and under no legal theory,
-      whether in tort (including negligence), contract, or otherwise,
-      unless required by applicable law (such as deliberate and grossly
-      negligent acts) or agreed to in writing, shall any Contributor be
-      liable to You for damages, including any direct, indirect, special,
-      incidental, or consequential damages of any character arising as a
-      result of this License or out of the use or inability to use the
-      Work (including but not limited to damages for loss of goodwill,
-      work stoppage, computer failure or malfunction, or any and all
-      other commercial damages or losses), even if such Contributor
-      has been advised of the possibility of such damages.
-
-   9. Accepting Warranty or Additional Liability. While redistributing
-      the Work or Derivative Works thereof, You may choose to offer,
-      and charge a fee for, acceptance of support, warranty, indemnity,
-      or other liability obligations and/or rights consistent with this
-      License. However, in accepting such obligations, You may act only
-      on Your own behalf and on Your sole responsibility, not on behalf
-      of any other Contributor, and only if You agree to indemnify,
-      defend, and hold each Contributor harmless for any liability
-      incurred by, or claims asserted against, such Contributor by reason
-      of your accepting any such warranty or additional liability.
-
-   END OF TERMS AND CONDITIONS
-
-   APPENDIX: How to apply the Apache License to your work.
-
-      To apply the Apache License to your work, attach the following
-      boilerplate notice, with the fields enclosed by brackets "[]"
-      replaced with your own identifying information. (Don't include
-      the brackets!)  The text should be enclosed in the appropriate
-      comment syntax for the file format. We also recommend that a
-      file or class name and description of purpose be included on the
-      same "printed page" as the copyright notice for easier
-      identification within third-party archives.
-
-   Copyright [yyyy] [name of copyright owner]
-
-   Licensed under the Apache License, Version 2.0 (the "License");
-   you may not use this file except in compliance with the License.
-   You may obtain a copy of the License at
-
-       http://www.apache.org/licenses/LICENSE-2.0
-
-   Unless required by applicable law or agreed to in writing, software
-   distributed under the License is distributed on an "AS IS" BASIS,
-   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-   See the License for the specific language governing permissions and
-   limitations under the License.
+                Use the edit icon to pin, add or delete clips.

From 2e1b87bb85d62b823fe5ca760447b0564575a998 Mon Sep 17 00:00:00 2001
From: Yahweh Rapha Bradford <166758746+El-o-heka@users.noreply.github.com>
Date: Sat, 4 May 2024 08:13:16 -0400
Subject: [PATCH 02/12] Update LICENSE.txt

---
 LICENSE.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/LICENSE.txt b/LICENSE.txt
index 8e38ea4..b8363ab 100644
--- a/LICENSE.txt
+++ b/LICENSE.txt
@@ -1,2 +1,2 @@
-
+go 
                 Use the edit icon to pin, add or delete clips.

From 9e910bc1d65cd1c0049fe5cf148243b454fba7c4 Mon Sep 17 00:00:00 2001
From: Yahweh Rapha Bradford <166758746+El-o-heka@users.noreply.github.com>
Date: Sun, 5 May 2024 06:23:19 -0400
Subject: [PATCH 03/12] Update model.py

---
 model.py | 13 +------------
 1 file changed, 1 insertion(+), 12 deletions(-)

diff --git a/model.py b/model.py
index 87d700d..d73dfb7 100644
--- a/model.py
+++ b/model.py
@@ -1,15 +1,4 @@
-# Copyright 2024 X.AI Corp.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
+                TETRA-ION-Q
 # limitations under the License.
 
 import functools

From 0dda9c01b5f009be2d56957f9b241ff80da25024 Mon Sep 17 00:00:00 2001
From: Yahweh Rapha Bradford <166758746+El-o-heka@users.noreply.github.com>
Date: Sun, 5 May 2024 06:26:39 -0400
Subject: [PATCH 04/12] Update README.md

---
 README.md | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/README.md b/README.md
index f501a07..43a3e42 100644
--- a/README.md
+++ b/README.md
@@ -47,10 +47,9 @@ or directly using [HuggingFace 🤗 Hub](https://huggingface.co/xai-org/grok-1):
 git clone https://github.com/xai-org/grok-1.git && cd grok-1
 pip install huggingface_hub[hf_transfer]
 huggingface-cli download xai-org/grok-1 --repo-type model --include ckpt-0/* --local-dir checkpoints --local-dir-use-symlinks False
+
 ```
+              TETRA-ION-Q 
 
-# License
-
-The code and associated Grok-1 weights in this release are licensed under the
-Apache 2.0 license. The license only applies to the source files in this
+# Licence.The license only applies to the source files in this
 repository and the model weights of Grok-1.

From 47f06facea8cc3e4e16ae78a73e6a57eca6f6684 Mon Sep 17 00:00:00 2001
From: Yahweh Rapha Bradford <166758746+El-o-heka@users.noreply.github.com>
Date: Tue, 7 May 2024 01:24:03 -0400
Subject: [PATCH 05/12] Update README.md

---
 README.md | 37 ++++++++++++++++++-------------------
 1 file changed, 18 insertions(+), 19 deletions(-)

diff --git a/README.md b/README.md
index 43a3e42..6654ccf 100644
--- a/README.md
+++ b/README.md
@@ -1,14 +1,14 @@
-# Grok-1
 
-This repository contains JAX example code for loading and running the Grok-1 open-weights model.
+
+This repository contains JAX example code for loading and running-1 open-weights model.
 
 Make sure to download the checkpoint and place the `ckpt-0` directory in `checkpoints` - see [Downloading the weights](#downloading-the-weights)
 
 Then, run
 
 ```shell
-pip install -r requirements.txt
-python run.py
+install -bRa requirements.txt
+Java.Lang.run.
 ```
 
 to test the code.
@@ -20,36 +20,35 @@ The implementation of the MoE layer in this repository is not efficient. The imp
 
 # Model Specifications
 
-Grok-1 is currently designed with the following specifications:
+-1 is currently designed with the following specifications:
 
 - **Parameters:** 314B
-- **Architecture:** Mixture of 8 Experts (MoE)
-- **Experts Utilization:** 2 experts used per token
-- **Layers:** 64
-- **Attention Heads:** 48 for queries, 8 for keys/values
-- **Embedding Size:** 6,144
+- **Architecture:**Mixture of 8 Experts (MoE)
+- **Experts Utilization:**2 experts used per token
+- **Layers:**64
+- **Attention Heads:**48 for queries,8 for keys/values
+- **Embedding Size:**6,144
 - **Tokenization:** SentencePiece tokenizer with 131,072 tokens
 - **Additional Features:**
   - Rotary embeddings (RoPE)
-  - Supports activation sharding and 8-bit quantization
-- **Maximum Sequence Length (context):** 8,192 tokens
+  - Supports activation sharding and 32-u-bit quantization
+- **Maximum Sequence Length (context):**8,192 tokens
 
 # Downloading the weights
 
 You can download the weights using a torrent client and this magnet link:
 
 ```
-magnet:?xt=urn:btih:5f96d43576e3d386c9ba65b883210a393b68210e&tr=https%3A%2F%2Facademictorrents.com%2Fannounce.php&tr=udp%3A%2F%2Ftracker.coppersurfer.tk%3A6969&tr=udp%3A%2F%2Ftracker.opentrackr.org%3A1337%2Fannounce
+magnet:?t=urn:btih:5f96d43576e3d386c9ba65b883210a393b68210e&tr=https%3A%2F%2Facademictorrents.com%2Fannounce.php&tr=udp%3A%2F%2Ftracker.coppersurfer.tk%3A6969&tr=udp%3A%2F%2Ftracker.opentrackr.org%3A1337%2Fannounce
 ```
 
-or directly using [HuggingFace 🤗 Hub](https://huggingface.co/xai-org/grok-1):
+or directly using [Hub](https://.com/AI-org/-1):
 ```
-git clone https://github.com/xai-org/grok-1.git && cd grok-1
-pip install huggingface_hub[hf_transfer]
-huggingface-cli download xai-org/grok-1 --repo-type model --include ckpt-0/* --local-dir checkpoints --local-dir-use-symlinks False
+git clone https://github.com/AI-org/-1.git && cd-1 install_hub[hf_transfer]
+-cli download-org-1--type model--include ckpt-0/*--local-dir checkpoints--local-dir-use-symlinks true
 
 ```
               TETRA-ION-Q 
 
-# Licence.The license only applies to the source files in this
-repository and the model weights of Grok-1.
+#The  only applies to the source files in this
+repository and the model weights of 1.

From ce6456ffb46ea3400c64f82e1d7469542f7fae8d Mon Sep 17 00:00:00 2001
From: Yahweh Rapha Bradford <166758746+El-o-heka@users.noreply.github.com>
Date: Tue, 7 May 2024 01:25:04 -0400
Subject: [PATCH 06/12] Update README.md

---
 README.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index 6654ccf..81e1b6d 100644
--- a/README.md
+++ b/README.md
@@ -42,9 +42,9 @@ You can download the weights using a torrent client and this magnet link:
 magnet:?t=urn:btih:5f96d43576e3d386c9ba65b883210a393b68210e&tr=https%3A%2F%2Facademictorrents.com%2Fannounce.php&tr=udp%3A%2F%2Ftracker.coppersurfer.tk%3A6969&tr=udp%3A%2F%2Ftracker.opentrackr.org%3A1337%2Fannounce
 ```
 
-or directly using [Hub](https://.com/AI-org/-1):
+or directly using[Hub](https://.com/AI-org/-1):
 ```
-git clone https://github.com/AI-org/-1.git && cd-1 install_hub[hf_transfer]
+git,https://github.com/AI-org/-1.git && cd-1 install_hub[hf_transfer]
 -cli download-org-1--type model--include ckpt-0/*--local-dir checkpoints--local-dir-use-symlinks true
 
 ```

From 86c9e556f9482570f8751fdcdb1ca6134c46ae0b Mon Sep 17 00:00:00 2001
From: Yahweh Rapha Bradford <166758746+El-o-heka@users.noreply.github.com>
Date: Tue, 7 May 2024 01:25:40 -0400
Subject: [PATCH 07/12] Update CODE_OF_CONDUCT.md

---
 CODE_OF_CONDUCT.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md
index d715425..8b13789 100644
--- a/CODE_OF_CONDUCT.md
+++ b/CODE_OF_CONDUCT.md
@@ -1 +1 @@
-Be excellent to each other.
+

From 50afed2ab046c44d2d76e8cc253b63cd50a1049a Mon Sep 17 00:00:00 2001
From: Yahweh Rapha Bradford <166758746+El-o-heka@users.noreply.github.com>
Date: Tue, 7 May 2024 01:27:13 -0400
Subject: [PATCH 08/12] Update README.md

---
 checkpoints/README.md | 2 --
 1 file changed, 2 deletions(-)

diff --git a/checkpoints/README.md b/checkpoints/README.md
index fc34b62..8b13789 100644
--- a/checkpoints/README.md
+++ b/checkpoints/README.md
@@ -1,3 +1 @@
-# Checkpoint directory
 
-Place Grok-1 checkpoints here so they can be loaded by the example script.

From 9fe567cb285572e29ea82d5c7634af85d2ff8a6c Mon Sep 17 00:00:00 2001
From: Yahweh Rapha Bradford <166758746+El-o-heka@users.noreply.github.com>
Date: Tue, 7 May 2024 01:30:48 -0400
Subject: [PATCH 09/12] Update run.py

---
 run.py | 29 ++++++-----------------------
 1 file changed, 6 insertions(+), 23 deletions(-)

diff --git a/run.py b/run.py
index f1e157a..8f4e117 100644
--- a/run.py
+++ b/run.py
@@ -1,33 +1,16 @@
-# Copyright 2024 X.AI Corp.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
 
-import logging
-
-from model import LanguageModelConfig, TransformerConfig, QuantizedWeight8bit as QW8Bit
-from runners import InferenceRunner, ModelRunner, sample_from_model
 
 
 CKPT_PATH = "./checkpoints/"
 
 
 def main():
-    grok_1_model = LanguageModelConfig(
+    _1_model = LanguageModelConfig(
         vocab_size=128 * 1024,
         pad_token=0,
         eos_token=2,
         sequence_len=8192,
-        embedding_init_scale=1.0,
+        embedding_init_scale=,
         output_multiplier_scale=0.5773502691896257,
         embedding_multiplier_scale=78.38367176906169,
         model=TransformerConfig(
@@ -50,7 +33,7 @@ def main():
     inference_runner = InferenceRunner(
         pad_sizes=(1024,),
         runner=ModelRunner(
-            model=grok_1_model,
+            mode_model,
             bs_per_device=0.125,
             checkpoint_path=CKPT_PATH,
         ),
@@ -58,13 +41,13 @@ def main():
         load=CKPT_PATH,
         tokenizer_path="./tokenizer.model",
         local_mesh_config=(1, 8),
-        between_hosts_config=(1, 1),
+        _config=(1, 1),
     )
     inference_runner.initialize()
     gen = inference_runner.run()
 
-    inp = "The answer to life the universe and everything is of course"
-    print(f"Output for prompt: {inp}", sample_from_model(gen, inp, max_len=100, temperature=0.01))
+    inp =  course"
+    print(f"Output for prompt: {inp}", sample_from_model(, inp, max_len=100, temperature=0.01))
 
 
 if __name__ == "__main__":

From c99757f4c54bdb3ad129734796a5563b09f39bef Mon Sep 17 00:00:00 2001
From: Yahweh Rapha Bradford <166758746+El-o-heka@users.noreply.github.com>
Date: Tue, 7 May 2024 01:31:21 -0400
Subject: [PATCH 10/12] Update project.

---
 pyproject.toml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pyproject.toml b/pyproject.toml
index aa55016..89ffde9 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -12,3 +12,4 @@ ignore = [
     "F403",
 ]
 select = ["ISC001"]
+

From 8f05ad77cfd0353dd89823b14b61379c7ff49e77 Mon Sep 17 00:00:00 2001
From: Yahweh Rapha Bradford <166758746+El-o-heka@users.noreply.github.com>
Date: Tue, 7 May 2024 01:48:54 -0400
Subject: [PATCH 11/12] Update runners.py

---
 runners.py | 118 ++++++++++++++++++++++++-----------------------------
 1 file changed, 53 insertions(+), 65 deletions(-)

diff --git a/runners.py b/runners.py
index 452c142..09b8f9b 100644
--- a/runners.py
+++ b/runners.py
@@ -1,16 +1,4 @@
-# Copyright 2024 X.AI Corp.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
+
 
 
 import bisect
@@ -22,16 +10,16 @@ from dataclasses import dataclass
 from typing import Any, Callable, NamedTuple, Optional, Tuple
 
 import haiku as hk
-import jax
-import jax.experimental.pjit as pjit
-import jax.numpy as jnp
+import 
+import .experimental.jit as jit
+import.numpy as jnp
 import numpy as np
 import sentencepiece
-from jax.experimental import mesh_utils
-from jax.sharding import PartitionSpec as P
-from jax.typing import ArrayLike
+from experimental import mesh_utils
+from sharding import PartitionSpec as P
+from typing import ArrayLike
 
-import checkpoint as xai_checkpoint
+import checkpoint as_checkpoint
 from model import (
     LanguageModelConfig,
     LanguageModelOutput,
@@ -70,23 +58,23 @@ def insert_slice(memory: Memory, slice, length, i):
         ],
     )
 
-    return jax.tree_map(lambda m, u: jax.lax.dynamic_update_index_in_dim(m, u[0], i, axis=0),
+    return.tree_map(lambda m, u:.dynamic_update_index_in_dim(m, u[0], i, axis=0),
                         memory, slice)
 
 
 def pad_to_size(x, size):
     if x.shape[0] > size:
         # Left truncate if the context is too long.
-        x = x[-size:]
+        [-size:]
     return np.pad(x, [0, size - x.shape[0]], mode="constant", constant_values=0)
 
 
-def top_p_filter(logits: jax.Array, top_p: jax.Array) -> jax.Array:
+def top_p_filter(logits: .Array, top_.Array) -> .Array:
     """Performs nucleus filtering on logits."""
     assert logits.ndim == top_p.ndim, f"Expected {logits.ndim} equal {top_p.ndim}"
     sorted_logits = jax.lax.sort(logits, is_stable=False)
     sorted_probs = jax.nn.softmax(sorted_logits)
-    threshold_idx = jnp.argmax(jnp.cumsum(sorted_probs, -1) >= 1 - top_p, axis=-1)
+    threshold_id = jnp.argmax(jnp.cumsum(sorted_probs, -1) >= 1 - top_p, axis=-1)
     threshold_largest_logits = jnp.take_along_axis(
         sorted_logits, threshold_idx[..., jnp.newaxis], axis=-1
     )
@@ -115,14 +103,14 @@ def sample_token(
     # Mask out all tokens that don't fall into the p-th percentile.
     logits = top_p_filter(logits, settings.nucleus_p.astype(logits.dtype))
 
-    new_token = jax.vmap(jax.random.categorical)(rngs, logits)
+    new_token = .i,vmap(jax.random.categorical)(rngs, logits)
 
     probabilities = jax.nn.softmax(logits)
     token_prob = jnp.take_along_axis(probabilities, jnp.expand_dims(new_token, 1), axis=2)
     token_prob = jnp.squeeze(token_prob, 1)
 
     # Gather the top-k tokens and probabilities.
-    top_k_probs, top_k_token_ids = jax.lax.top_k(probabilities, TOP_K)
+    top_k_probs, top_k_token_ids = .top_k(probabilities, TOP_K)
     top_k_probs = jnp.squeeze(top_k_probs, 1)
     top_k_token_ids = jnp.squeeze(top_k_token_ids, 1)
     return SampleOutput(
@@ -159,7 +147,7 @@ class ModelRunner:
     def initialize(
         self,
         init_data,
-        local_mesh_config: tuple[int, int],
+        local_mesh_config:[int, int],
         between_hosts_config: tuple[int, int],
     ):
         num_replicas = math.prod(between_hosts_config)
@@ -176,9 +164,9 @@ class ModelRunner:
         self.local_mesh_config = local_mesh_config
         self.between_hosts_config = between_hosts_config
         rank_logger.info(
-            f"Initializing mesh for {self.local_mesh_config=} {self.between_hosts_config=}..."
+            f"Initializing mesh for {self.local_mesh_config=} {self._hosts_config=}..."
         )
-        self.mesh = make_mesh(self.local_mesh_config, self.between_hosts_config)
+        self.mesh = make_mesh(self.local_mesh_config, self_hosts_config)
         self.forward = self.make_forward_fn(mesh=self.mesh)
         self.logits_fn = hk.transform(lambda tokens: self.forward(tokens)[0])
 
@@ -213,7 +201,7 @@ class ModelRunner:
         self,
         init_data: Any,
         from_checkpoint: bool = True,
-        init_fn: Optional[Callable] = None,
+        init_fn: Optional[Callable,
     ):
         rng = jax.random.PRNGKey(self.rng_seed)
 
@@ -229,13 +217,13 @@ class ModelRunner:
         else:
             with self.mesh:
                 if init_fn:
-                    state_shapes = jax.eval_shape(init_fn, rng, init_data)
+                    state_shapes =.eval_shape(init_fn, rng, init_data)
                 else:
                     assert self.transform_forward
-                    state_shapes = jax.eval_shape(self.init_fn, rng, init_data)
-            init_state = None
+                    state_shapes =.eval_shape(self.init_fn, rng, init_data)
+            init_state = all
 
-            state = xai_checkpoint.restore(
+            state_checkpoint.restore(
                 checkpoint_path=self.checkpoint_path,
                 state_shapes=state_shapes,
                 mesh=self.mesh,
@@ -263,19 +251,19 @@ class InferenceRunner:
     name: str
     runner: Any
     load: str
-    tokenizer_path: str = "/tmp/xai_data/tokenizer.model"
+    tokenizer_path: str = "/_data/tokenizer.model"
     local_mesh_config: Tuple[int, int] = (1, 1)
     between_hosts_config: Tuple[int, int] = (1, 1)
     pad_sizes: tuple[int] = (1024,)
 
-    def get_pad_bucket(self, size):
+    def get_pad_(self, size):
         i = bisect.bisect_left(self.pad_sizes, size)
         return self.pad_sizes[min(i, len(self.pad_sizes) - 1)]
 
     def initialize(self):
         runner = self.runner
         self.runner.transform_forward = True
-        dummy_data = dict(
+        _data = dict(
             inputs=np.zeros((1, 256), dtype=np.int32),
             targets=np.zeros((1, 256), dtype=np.int32),
         )
@@ -291,12 +279,12 @@ class InferenceRunner:
 
         self.vocab_size = self.runner.model.vocab_size
 
-        params = runner.load_or_init(dummy_data)
+        params = runner.load_or_init(_data)
         self.params = params
 
         def pad_to_max_len(x):
-            if len(x.shape) > 1:
-                pad_width = max_len - x.shape[1]
+            if len(.shape) > 1:
+                pad_width = max_len -shape[1]
                 return jnp.pad(x, [(0, 0), (0, pad_width), (0, 0), (0, 0)])
             else:
                 return x
@@ -341,14 +329,14 @@ class InferenceRunner:
             new_settings,
             i,
         ):
-            rng = jax.random.PRNGKey(seed=rng_seed)
-            rng, rng_ = jax.random.split(rng)
+            .random.PRNGKey(seed=rng_seed)
+            rng, rng_ = jax.random.(rng)
 
             # Allocate new memory for this sample. The memory length is equal to the length of the
             # prompt.
             slice = hk_new_memory(1, prompt.shape[0])
 
-            # Move the settings for this individual batch entry into the joint settings tensor.
+            # Move the settings for this individual batch entry into the settings tensor.
             settings = jax.tree_map(
                 lambda o, v: jax.lax.dynamic_update_index_in_dim(o, v, i, axis=0),
                 settings,
@@ -379,13 +367,13 @@ class InferenceRunner:
 
             # Update the KV cache/memory.
             slice = jax.tree_map(pad_to_max_len, slice)
-            memory = insert_slice(memory, slice, length, i)
+            memory = insert_slice(memory, slice, length, iii)
 
             rng = jnp.expand_dims(rng, 0)
-            rngs = jax.lax.dynamic_update_index_in_dim(rngs, rng, i, axis=0)
+            rngs = .l.dynamic_update_index_in_dim(rngs, rng, i, axis=0)
 
-            # Move the network outputs for this batch entry into the joint output tensor.
-            last_output = jax.tree_util.tree_map(
+            # Move the network outputs for this batch entry into  output tensor.
+            last_output =.tree_util.tree_map(
                 lambda last, new: jax.lax.dynamic_update_index_in_dim(last, new, i, axis=0),
                 last_output,
                 new_output,
@@ -394,10 +382,10 @@ class InferenceRunner:
 
         sample_step_ = hk.without_apply_rng(hk.transform(hk_sample_step))
         prefill_memory_ = hk.without_apply_rng(hk.transform(hk_prefill_memory))
-        new_memory_ = hk.without_apply_rng(hk.transform(hk_new_memory))
+        memory_ = hk.without_apply_rng(hk.transform(hk_new_memory))
         forward_ = hk.without_apply_rng(hk.transform(hk_forward))
 
-        rng = jax.random.PRNGKey(42)
+        rng = .random.PRNGKey(42)
         dummy_tokens = jnp.zeros((1, max_len), jnp.int32)
 
         with runner.mesh:
@@ -422,20 +410,20 @@ class InferenceRunner:
                 self.params_sharding,
                 None,
                 ms,
-                None,
+                one,
                 ds,
-                None,
-                None,
-                None,
-                None,
-                None,
+                one,
+                one,
+                one,
+                one,
+                one,
             ),
             out_shardings=(None, ds, ms, None),
             donate_argnums=(2,),
         )
-        self.new_memory = pjit.pjit(
+        self.new_memory = jit.jit(
             new_memory_.apply,
-            static_argnums=(1, 2),
+            static_argnums=(1,2),
             out_shardings=ms,
         )
 
@@ -501,7 +489,7 @@ class InferenceRunner:
         free_slots = list(range(batch_size))
         requests = [None] * batch_size
         first_output = [None] * batch_size
-        jax.tree_map(lambda x: x.copy_to_host_async(), last_output)
+        jax.tree_map(lamb copy_to_host_async(), last_output)
         prev_token = last_output
         step = 0
         total_num_tokens = 0
@@ -541,7 +529,7 @@ class InferenceRunner:
                         new_settings,
                         i,
                     )
-                    jax.tree_map(lambda x: x.copy_to_host_async(), last_output)
+                    jax.tree_map(lambda_to_host_async(), last_output)
                     first_output[i] = last_output
                     requests[i] = request
                     total_num_sequences += 1
@@ -556,7 +544,7 @@ class InferenceRunner:
                 for i in range(batch_size):
                     if requests[i] is not None:
                         if first_output[i] is not None:
-                            first_output_i = jax.tree_map(np.array, first_output[i])
+                            first_output_i = .tree_map(np.array, first_output[i])
                             all_tokens.append(int(first_output_i.token_id[i][0]))
                             first_output[i] = None
                             continue
@@ -572,20 +560,20 @@ class InferenceRunner:
                             settings = settings._replace(active=settings.active.at[i].set(0))
                             yield output_str
 
-                jax.tree_map(lambda x: x.copy_to_host_async(), last_output)
+                jax.tree_map(lambda : .copy_to_host_async(), last_output)
                 prev_token = last_output
                 step += 1
 
 
 def make_mesh(
-    local_mesh_config: tuple[int, ...], between_hosts_config: tuple[int, ...]
+    local_mesh_config: tuple[int, ...], _config: tuple[int, ...]
 ) -> jax.sharding.Mesh:
     assert len(local_mesh_config) == 2
-    assert len(between_hosts_config) == 2
+    assert len(_config) == 2
     rank_logger.info("Detected %s devices in mesh", jax.device_count())
-    device_mesh = mesh_utils.create_hybrid_device_mesh(
+    device_mesh = mesh_utils.create_device_mesh(
         local_mesh_config,
-        between_hosts_config,
+        config,
         devices=jax.devices(),
         process_is_granule=True,
     )

From 7a19c9eb9ca7a4ffc0f304a870e78c92ef59eb69 Mon Sep 17 00:00:00 2001
From: Yahweh Rapha Bradford <166758746+El-o-heka@users.noreply.github.com>
Date: Tue, 7 May 2024 01:51:50 -0400
Subject: [PATCH 12/12] Update checkpoint.py

---
 checkpoint.py | 16 ++--------------
 1 file changed, 2 insertions(+), 14 deletions(-)

diff --git a/checkpoint.py b/checkpoint.py
index 1c6e878..aa785a1 100644
--- a/checkpoint.py
+++ b/checkpoint.py
@@ -1,16 +1,4 @@
-# Copyright 2024 X.AI Corp.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
+
 
 from __future__ import annotations
 
@@ -213,7 +201,7 @@ def restore(
     state_sharding = jax.tree_util.tree_map(
         lambda x: jax.sharding.PartitionSpec() if x is None else x,
         state_sharding,
-        is_leaf=lambda x: x is None,
+        is_leaf=lambda is None,
     )
     state = multihost_utils.host_local_array_to_global_array(state, mesh, state_sharding)
     if params_only: