freecs
/

ArtificialThinker-Phi2

 ---
 license: unknown
+inference: false
+datasets:
+- vicgalle/alpaca-gpt4
+base_model: microsoft/phi-2
 ---
+---
+# Model Card: Phine-2-v0
+## Overview
+- **Model Name:** Phine-2
+- **Base Model:** Phi-2 (Microsoft model)
+- **Created By:** [GR](https://twitter.com/gr_username)
+- **Donations Link:** [Click Me](https://www.buymeacoffee.com/gr.0)
+## Model Information
+This model has been finetuned using the approach described in the paper: "Reasoning Is All You Need".
+The input structure is the following: `<|system|>sys_message\n<|prompt|>prompt\n<|reasoning|>reasoning\n<|response|>response<|endoftext|>`
+## Code Usage
+To try the model, use the following Python code snippet:
+```python
+#######################
+'''
+License: MIT
+'''
+#######################
+##### Dependencies
+""" IMPORTANT: Uncomment the following line if you are in a Colab/Notebook environment """
+#!pip install gradio einops accelerate bitsandbytes transformers
+#####
+import gradio as gr
+import transformers
+from transformers import AutoTokenizer, AutoModelForCausalLM
+import torch
+import random
+import re
+def cut_text_after_last_token(text, token):
+    last_occurrence = text.rfind(token)
+    if last_occurrence != -1:
+        result = text[last_occurrence + len(token):].strip()
+        return result
+    else:
+        return None
+class _SentinelTokenStoppingCriteria(transformers.StoppingCriteria):
+    def __init__(self, sentinel_token_ids: torch.LongTensor,
+                 starting_idx: int):
+        transformers.StoppingCriteria.__init__(self)
+        self.sentinel_token_ids = sentinel_token_ids
+        self.starting_idx = starting_idx
+    def __call__(self, input_ids: torch.LongTensor,
+                 _scores: torch.FloatTensor) -> bool:
+        for sample in input_ids:
+            trimmed_sample = sample[self.starting_idx:]
+            if trimmed_sample.shape[-1] < self.sentinel_token_ids.shape[-1]:
+                continue
+            for window in trimmed_sample.unfold(
+                    0, self.sentinel_token_ids.shape[-1], 1):
+                if torch.all(torch.eq(self.sentinel_token_ids, window)):
+                    return True
+        return False
+model_path = 'freecs/ArtificialThinker-Phi2'
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
+model = AutoModelForCausalLM.from_pretrained(model_path, trust_remote_code=True, load_in_4bit=False, torch_dtype=torch.float16).to(device) #remove .to() if load_in_4/8bit = True
+def phine(message, history, temperature, top_p, top_k, repetition_penalty, sys_message):
+    n = 0
+    context = ""
+    if history and len(history) > 0:
+        for x in history:
+          for h in x:
+            if n%2 == 0:
+              context+=f"""\n<|prompt|>{h}\n"""
+            else:
+              pattern = re.compile(r'<details>.*?</details>')
+              result = re.sub(pattern, '', h)
+              context+=f"""<|response|>{result}"""
+            n+=1
+    else:
+        context = ""
+    prompt = f"""\n<|system|>{sys_message}"""+context+"\n<|prompt|>"+message+"\n<|reasoning|>"
+    tokenized = tokenizer(prompt, return_tensors="pt").to(device)
+    stopping_criteria_list = transformers.StoppingCriteriaList([
+        _SentinelTokenStoppingCriteria(
+            sentinel_token_ids=tokenizer(
+                "<|endoftext|>",
+                add_special_tokens=False,
+                return_tensors="pt",
+            ).input_ids.to(device),
+            starting_idx=tokenized.input_ids.shape[-1])
+    ])
+    token = model.generate(**tokenized,
+                        stopping_criteria=stopping_criteria_list,
+                        do_sample=True,
+                        max_length=2048, temperature=temperature, top_p=top_p, top_k = top_k, repetition_penalty = repetition_penalty
+                           )
+    completion = tokenizer.decode(token[0], skip_special_tokens=True)
+    token = "<|reasoning|>"
+    reasoning = cut_text_after_last_token(completion, token)
+    prompt = f"""\n<|system|>{sys_message}"""+context+"\n<|prompt|>"+message+"\n<|reasoning|>"+reasoning+"\n<|response|>"
+    tokenized = tokenizer(prompt, return_tensors="pt").to(device)
+    token = model.generate(**tokenized,
+                        stopping_criteria=stopping_criteria_list,
+                        do_sample=True,
+                        max_length=2048, temperature=temperature, top_p=top_p, top_k = top_k, repetition_penalty = repetition_penalty
+                           )
+    completion = tokenizer.decode(token[0], skip_special_tokens=True)
+    token = "<|response|>"
+    response = cut_text_after_last_token(completion, token)
+    res = f"""<details><summary>Reasoning</summary>{reasoning}</details>\n\n{response}"""
+    return res
+demo = gr.ChatInterface(phine,
+                          additional_inputs=[
+                              gr.Slider(0.1, 2.0, label="temperature", value=0.5),
+                              gr.Slider(0.1, 2.0, label="Top P", value=0.9),
+                              gr.Slider(1, 500, label="Top K", value=50),
+                              gr.Slider(0.1, 2.0, label="Repetition Penalty", value=1.1),
+                              gr.Textbox(label="System Prompt",max_lines=1,interactive=True, value="You are an AI assistant named Phine developed by FreeCS.org. You are polite and smart.")
+                          ]
+                          )
+if __name__ == "__main__":
+    demo.queue().launch(share=True, debug=True) #If debug=True causes problems you can set it to False
+```
+---