freeCS-dot-org commited on
Commit
8ce7acd
·
1 Parent(s): 25a1006

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +174 -0
README.md CHANGED
@@ -1,3 +1,177 @@
1
  ---
2
  license: unknown
 
 
 
 
3
  ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
  license: unknown
3
+ inference: false
4
+ datasets:
5
+ - vicgalle/alpaca-gpt4
6
+ base_model: microsoft/phi-2
7
  ---
8
+ ---
9
+ # Model Card: Phine-2-v0
10
+
11
+ ## Overview
12
+
13
+ - **Model Name:** Phine-2
14
+ - **Base Model:** Phi-2 (Microsoft model)
15
+ - **Created By:** [GR](https://twitter.com/gr_username)
16
+ - **Donations Link:** [Click Me](https://www.buymeacoffee.com/gr.0)
17
+
18
+ ## Model Information
19
+
20
+ This model has been finetuned using the approach described in the paper: "Reasoning Is All You Need".
21
+
22
+ The input structure is the following: `<|system|>sys_message\n<|prompt|>prompt\n<|reasoning|>reasoning\n<|response|>response<|endoftext|>`
23
+
24
+ ## Code Usage
25
+
26
+ To try the model, use the following Python code snippet:
27
+
28
+ ```python
29
+ #######################
30
+ '''
31
+ License: MIT
32
+ '''
33
+ #######################
34
+
35
+
36
+ ##### Dependencies
37
+
38
+ """ IMPORTANT: Uncomment the following line if you are in a Colab/Notebook environment """
39
+
40
+ #!pip install gradio einops accelerate bitsandbytes transformers
41
+
42
+ #####
43
+
44
+ import gradio as gr
45
+ import transformers
46
+ from transformers import AutoTokenizer, AutoModelForCausalLM
47
+ import torch
48
+ import random
49
+ import re
50
+
51
+ def cut_text_after_last_token(text, token):
52
+
53
+ last_occurrence = text.rfind(token)
54
+
55
+ if last_occurrence != -1:
56
+ result = text[last_occurrence + len(token):].strip()
57
+ return result
58
+ else:
59
+ return None
60
+
61
+
62
+ class _SentinelTokenStoppingCriteria(transformers.StoppingCriteria):
63
+
64
+ def __init__(self, sentinel_token_ids: torch.LongTensor,
65
+ starting_idx: int):
66
+ transformers.StoppingCriteria.__init__(self)
67
+ self.sentinel_token_ids = sentinel_token_ids
68
+ self.starting_idx = starting_idx
69
+
70
+ def __call__(self, input_ids: torch.LongTensor,
71
+ _scores: torch.FloatTensor) -> bool:
72
+ for sample in input_ids:
73
+ trimmed_sample = sample[self.starting_idx:]
74
+
75
+ if trimmed_sample.shape[-1] < self.sentinel_token_ids.shape[-1]:
76
+ continue
77
+
78
+ for window in trimmed_sample.unfold(
79
+ 0, self.sentinel_token_ids.shape[-1], 1):
80
+ if torch.all(torch.eq(self.sentinel_token_ids, window)):
81
+ return True
82
+ return False
83
+
84
+
85
+
86
+
87
+
88
+ model_path = 'freecs/ArtificialThinker-Phi2'
89
+
90
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
91
+
92
+ tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
93
+
94
+ model = AutoModelForCausalLM.from_pretrained(model_path, trust_remote_code=True, load_in_4bit=False, torch_dtype=torch.float16).to(device) #remove .to() if load_in_4/8bit = True
95
+
96
+
97
+
98
+ def phine(message, history, temperature, top_p, top_k, repetition_penalty, sys_message):
99
+
100
+
101
+ n = 0
102
+ context = ""
103
+ if history and len(history) > 0:
104
+
105
+ for x in history:
106
+ for h in x:
107
+ if n%2 == 0:
108
+ context+=f"""\n<|prompt|>{h}\n"""
109
+ else:
110
+ pattern = re.compile(r'<details>.*?</details>')
111
+ result = re.sub(pattern, '', h)
112
+ context+=f"""<|response|>{result}"""
113
+ n+=1
114
+ else:
115
+
116
+ context = ""
117
+
118
+
119
+
120
+
121
+ prompt = f"""\n<|system|>{sys_message}"""+context+"\n<|prompt|>"+message+"\n<|reasoning|>"
122
+ tokenized = tokenizer(prompt, return_tensors="pt").to(device)
123
+
124
+
125
+ stopping_criteria_list = transformers.StoppingCriteriaList([
126
+ _SentinelTokenStoppingCriteria(
127
+ sentinel_token_ids=tokenizer(
128
+ "<|endoftext|>",
129
+ add_special_tokens=False,
130
+ return_tensors="pt",
131
+ ).input_ids.to(device),
132
+ starting_idx=tokenized.input_ids.shape[-1])
133
+ ])
134
+
135
+
136
+ token = model.generate(**tokenized,
137
+ stopping_criteria=stopping_criteria_list,
138
+ do_sample=True,
139
+ max_length=2048, temperature=temperature, top_p=top_p, top_k = top_k, repetition_penalty = repetition_penalty
140
+ )
141
+
142
+ completion = tokenizer.decode(token[0], skip_special_tokens=True)
143
+
144
+ token = "<|reasoning|>"
145
+ reasoning = cut_text_after_last_token(completion, token)
146
+ prompt = f"""\n<|system|>{sys_message}"""+context+"\n<|prompt|>"+message+"\n<|reasoning|>"+reasoning+"\n<|response|>"
147
+
148
+ tokenized = tokenizer(prompt, return_tensors="pt").to(device)
149
+ token = model.generate(**tokenized,
150
+ stopping_criteria=stopping_criteria_list,
151
+ do_sample=True,
152
+ max_length=2048, temperature=temperature, top_p=top_p, top_k = top_k, repetition_penalty = repetition_penalty
153
+ )
154
+ completion = tokenizer.decode(token[0], skip_special_tokens=True)
155
+ token = "<|response|>"
156
+ response = cut_text_after_last_token(completion, token)
157
+
158
+
159
+ res = f"""<details><summary>Reasoning</summary>{reasoning}</details>\n\n{response}"""
160
+
161
+ return res
162
+
163
+
164
+ demo = gr.ChatInterface(phine,
165
+ additional_inputs=[
166
+ gr.Slider(0.1, 2.0, label="temperature", value=0.5),
167
+ gr.Slider(0.1, 2.0, label="Top P", value=0.9),
168
+ gr.Slider(1, 500, label="Top K", value=50),
169
+ gr.Slider(0.1, 2.0, label="Repetition Penalty", value=1.1),
170
+ gr.Textbox(label="System Prompt",max_lines=1,interactive=True, value="You are an AI assistant named Phine developed by FreeCS.org. You are polite and smart.")
171
+ ]
172
+ )
173
+
174
+ if __name__ == "__main__":
175
+ demo.queue().launch(share=True, debug=True) #If debug=True causes problems you can set it to False
176
+ ```
177
+ ---