Ramkumarnn commited on
Commit
00e634a
·
0 Parent(s):

Blind navigation MVP

Browse files
.gitattributes ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ *.pt filter=lfs diff=lfs merge=lfs -text
2
+ *.task filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ __pycache__/
2
+ *.pyc
3
+ venv/
4
+ .gradio/
5
+ test_images/
6
+ results/
7
+ video_test/
8
+ new_images/
9
+ *.mp4
10
+ *.pptx
11
+ *.pdf
12
+ cert.pem
13
+ key.pem
14
+ # Old POC files
15
+ /blind_nav.py
16
+ /nav_dashboard.py
17
+ /video_pipeline.py
18
+ /full_pipeline.py
19
+ /app.py
20
+ /pose_slope_test.py
README.md ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Blind Navigation Assistant
2
+
3
+ Vision-based navigation system for visually impaired users. Uses camera input to detect obstacles, estimate terrain slope, and provide real-time voice guidance.
4
+
5
+ ## Models Used
6
+ - **YOLOv8n** — obstacle detection
7
+ - **Depth Anything** — monocular depth → slope estimation
8
+ - **BlazePose** — gait analysis
9
+
10
+ ## Quick Start
11
+
12
+ ```bash
13
+ # Clone
14
+ git clone https://github.com/<your-username>/blind-nav.git
15
+ cd blind-nav
16
+
17
+ # Install
18
+ python3 -m venv venv
19
+ source venv/bin/activate
20
+ pip install -r requirements.txt
21
+ sudo apt-get install -y ffmpeg espeak
22
+
23
+ # Run
24
+ NAV_DEPTH_INPUT_SIZE=256 python app_live.py
25
+ ```
26
+
27
+ Open: `http://<your-ip>:7860`
28
+
29
+ ## EC2 Deployment
30
+
31
+ ```bash
32
+ git clone https://github.com/<your-username>/blind-nav.git
33
+ cd blind-nav
34
+ bash deploy_ec2.sh
35
+ cd ~/blind_nav
36
+ source venv/bin/activate
37
+ NAV_DEPTH_INPUT_SIZE=256 python app_live.py
38
+ ```
39
+
40
+ Ensure port 7860 is open in your EC2 security group.
41
+
42
+ ## Structure
43
+
44
+ ```
45
+ config.py — settings, model paths, thresholds
46
+ core/
47
+ detector.py — YOLO + ByteTrack
48
+ depth.py — Depth Anything + slope smoothing
49
+ pose.py — BlazePose gait
50
+ risk_engine.py — contextual risk fusion
51
+ guidance.py — voice guidance + throttling
52
+ tts.py — live TTS (edge-tts/espeak)
53
+ tts_render.py — offline TTS baking into video
54
+ camera.py — threaded camera stream
55
+ renderers/
56
+ overlay.py — HUD overlay
57
+ blind_nav.py — 2-panel view
58
+ pipeline.py — unified video/image/webcam pipeline
59
+ app_live.py — Gradio live streaming UI
60
+ app_mobile.py — OpenCV mobile camera UI
61
+ ```
app_live.py ADDED
@@ -0,0 +1,177 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Live Phone Camera → Real-time Navigation Assistant.
2
+
3
+ Phone streams camera on left, annotated output appears on right.
4
+ Uses HTTPS so browser allows camera access.
5
+ """
6
+ import gradio as gr
7
+ import cv2
8
+ import numpy as np
9
+ from PIL import Image as PILImage
10
+ import os, sys, math, time
11
+
12
+ sys.path.insert(0, os.path.dirname(__file__))
13
+ from core import detector, depth, pose, risk_engine
14
+ from core.depth import SlopeSmoother
15
+ from core.detector import ObstacleTracker
16
+ from core.guidance import GuidanceEngine
17
+
18
+ # Persistent state across frames
19
+ _st = {}
20
+
21
+ def _init():
22
+ if 'ready' not in _st:
23
+ _st['sm'] = SlopeSmoother()
24
+ _st['tr'] = ObstacleTracker()
25
+ _st['gu'] = GuidanceEngine()
26
+ _st['dn'] = None
27
+ _st['n'] = 0
28
+ _st['ready'] = True
29
+
30
+ def _reset():
31
+ _st.clear()
32
+ return None, "Reset. Point your camera and start."
33
+
34
+
35
+ def process_frame(frame):
36
+ if frame is None:
37
+ return None, "Waiting for camera..."
38
+
39
+ _init()
40
+ img = cv2.cvtColor(np.array(frame), cv2.COLOR_RGB2BGR)
41
+ h, w = img.shape[:2]
42
+ rgb = np.array(frame)
43
+
44
+ obs = detector.detect(img, track=True)
45
+ new_o, close_o, _ = _st['tr'].update(obs)
46
+ gait, lms, foot_y = pose.analyze(rgb, w, h)
47
+
48
+ _st['n'] += 1
49
+ if _st['n'] % 5 == 0 or _st['dn'] is None:
50
+ _st['dn'] = depth.estimate_depth(rgb, h, w)
51
+ rs, rd, rt, _ = depth.estimate_slope(_st['dn'], h, w, foot_y)
52
+ _st['sm'].update(rs, rd, rt)
53
+
54
+ sm = _st['sm']
55
+ sa, sd = sm.angle, sm.direction
56
+
57
+ risk_d = risk_engine.assess(sa, sd, gait, obs,
58
+ slope_trend=sm.trend, new_obstacles=new_o, closing_obstacles=close_o)
59
+ guid = _st['gu'].compute(sa, sd, sm.terrain, obs,
60
+ slope_trend=sm.trend, new_obstacles=new_o, closing_obstacles=close_o)
61
+
62
+ out = img.copy()
63
+
64
+ # ── Obstacles ──
65
+ for ob in obs:
66
+ x1,y1,x2,y2 = ob['box']
67
+ u = ob['proximity']
68
+ color = (0,0,255) if u>0.7 else (0,180,255) if u>0.4 else (0,200,0)
69
+ ov = out.copy()
70
+ cv2.rectangle(ov,(x1,y1),(x2,y2),color,-1)
71
+ cv2.addWeighted(ov, 0.35 if u>0.7 else 0.2, out, 0.65 if u>0.7 else 0.8, 0, out)
72
+ cv2.rectangle(out,(x1,y1),(x2,y2),color,3)
73
+ lbl = f'{ob["label"].upper()} {ob["dist"]}'
74
+ (tw,th),_ = cv2.getTextSize(lbl, cv2.FONT_HERSHEY_SIMPLEX, 0.6, 2)
75
+ cv2.rectangle(out,(x1,y1-th-10),(x1+tw+6,y1),color,-1)
76
+ cv2.putText(out,lbl,(x1+3,y1-5),cv2.FONT_HERSHEY_SIMPLEX,0.6,(255,255,255),2)
77
+ cx,cy = ob['center']
78
+ if ob['direction']=='LEFT':
79
+ cv2.arrowedLine(out,(cx+50,cy),(cx-50,cy),(255,255,255),3,tipLength=0.4)
80
+ elif ob['direction']=='RIGHT':
81
+ cv2.arrowedLine(out,(cx-50,cy),(cx+50,cy),(255,255,255),3,tipLength=0.4)
82
+ else:
83
+ cv2.arrowedLine(out,(cx,cy-40),(cx,cy+40),(0,0,255),3,tipLength=0.4)
84
+ if u > 0.7:
85
+ cv2.putText(out,'!! CLOSE !!',(cx-45,y2+18),cv2.FONT_HERSHEY_SIMPLEX,0.5,(0,0,255),2)
86
+
87
+ # ── Path ──
88
+ blocked = [o for o in obs if o['box'][3] > h//2]
89
+ if not blocked:
90
+ pts = np.array([[w//4+30,h//2],[3*w//4-30,h//2],[3*w//4+20,h],[w//4-20,h]])
91
+ ov2 = out.copy()
92
+ cv2.fillPoly(ov2,[pts],(0,120,0))
93
+ cv2.addWeighted(ov2,0.15,out,0.85,0,out)
94
+ cv2.polylines(out,[pts],True,(0,255,0),2)
95
+ cv2.putText(out,'CLEAR',(w//2-30,h//2+30),cv2.FONT_HERSHEY_SIMPLEX,0.6,(0,255,0),2)
96
+ else:
97
+ lb = any(o['box'][0] < w//2 for o in blocked)
98
+ rb = any(o['box'][2] > w//2 for o in blocked)
99
+ if not lb:
100
+ cv2.arrowedLine(out,(w//2,h*3//4),(w//6,h*3//4),(0,255,0),4,tipLength=0.3)
101
+ cv2.putText(out,'GO LEFT',(20,h//2+30),cv2.FONT_HERSHEY_SIMPLEX,0.8,(0,255,0),2)
102
+ elif not rb:
103
+ cv2.arrowedLine(out,(w//2,h*3//4),(5*w//6,h*3//4),(0,255,0),4,tipLength=0.3)
104
+ cv2.putText(out,'GO RIGHT',(2*w//3-20,h//2+30),cv2.FONT_HERSHEY_SIMPLEX,0.8,(0,255,0),2)
105
+ else:
106
+ cv2.putText(out,'STOP',(w//2-40,h//2),cv2.FONT_HERSHEY_SIMPLEX,1.2,(0,0,255),3)
107
+
108
+ # ── Slope arrow ──
109
+ if abs(sa) > 3:
110
+ cx2,cy2 = w//2, h-50
111
+ ax = int(cx2 + math.cos(math.radians(sa))*40)
112
+ ay = int(cy2 - math.sin(math.radians(sa))*40)
113
+ cv2.arrowedLine(out,(cx2,cy2),(ax,ay),(0,255,255),3,tipLength=0.3)
114
+
115
+ # ── HUD ──
116
+ r = risk_d['risk']
117
+ rc = {'SAFE':(0,180,0),'LOW':(0,220,0),'MEDIUM':(0,180,220),'HIGH':(0,0,220)}[r]
118
+ cv2.rectangle(out,(0,0),(w,55),(0,0,0),-1)
119
+ cv2.rectangle(out,(5,5),(170,32),rc,-1)
120
+ cv2.putText(out,f'RISK: {r} ({risk_d["score"]})',(10,26),cv2.FONT_HERSHEY_SIMPLEX,0.5,(255,255,255),2)
121
+ cv2.putText(out,f'{sd} {sa:.0f}deg {sm.terrain}',(180,26),cv2.FONT_HERSHEY_SIMPLEX,0.38,(200,200,200),1)
122
+ cv2.putText(out,f'Step:{guid["step"]} Lean:{guid["lean"]} Knee:{guid["knee_rec"]}',
123
+ (5,48),cv2.FONT_HERSHEY_SIMPLEX,0.33,(0,255,255),1)
124
+
125
+ # ── Voice bar ──
126
+ cv2.rectangle(out,(0,h-30),(w,h),(20,20,40),-1)
127
+ cv2.putText(out,guid['voice'][:50],(8,h-10),cv2.FONT_HERSHEY_SIMPLEX,0.38,(255,255,255),1)
128
+
129
+ # Status text
130
+ status = f"🔊 **{guid['voice']}**\n\n"
131
+ status += f"Risk: **{r}** | Slope: {sd} {sa:.0f}° | Obstacles: {len(obs)} | Step: {guid['step']} | Lean: {guid['lean']}"
132
+
133
+ return PILImage.fromarray(cv2.cvtColor(out, cv2.COLOR_BGR2RGB)), status
134
+
135
+
136
+ with gr.Blocks(title="Live Navigation", head="""
137
+ <script>
138
+ // Force rear camera on mobile
139
+ document.addEventListener('DOMContentLoaded', () => {
140
+ const origGetUserMedia = navigator.mediaDevices.getUserMedia.bind(navigator.mediaDevices);
141
+ navigator.mediaDevices.getUserMedia = (constraints) => {
142
+ if (constraints && constraints.video) {
143
+ constraints.video = { facingMode: { exact: 'environment' } };
144
+ }
145
+ return origGetUserMedia(constraints);
146
+ };
147
+ });
148
+ </script>
149
+ """) as demo:
150
+ gr.Markdown("# 🦯 Live Navigation Assistant\nPoint your phone camera → see real-time obstacle & path guidance")
151
+
152
+ with gr.Row():
153
+ cam_in = gr.Image(sources=["webcam"], streaming=True, type="pil",
154
+ label="📹 Live Camera")
155
+ cam_out = gr.Image(label="🎯 Navigation View")
156
+
157
+ status = gr.Markdown("Point camera and start streaming...")
158
+
159
+ cam_in.stream(fn=process_frame, inputs=cam_in, outputs=[cam_out, status])
160
+
161
+ gr.Button("🔄 Reset").click(fn=_reset, outputs=[cam_out, status])
162
+
163
+ if __name__ == "__main__":
164
+ # Preload models
165
+ print("Preloading models...", flush=True)
166
+ dummy = np.zeros((100, 100, 3), dtype=np.uint8)
167
+ detector.detect(dummy)
168
+ depth.estimate_depth(dummy, 100, 100)
169
+ pose.analyze(dummy, 100, 100)
170
+ print("Models ready!", flush=True)
171
+
172
+ demo.launch(
173
+ server_name="0.0.0.0",
174
+ server_port=7860,
175
+ share=True,
176
+ show_error=True,
177
+ )
app_mobile.py ADDED
@@ -0,0 +1,284 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Live Mobile Camera Navigation Assistant.
2
+
3
+ Connects to phone camera via IP Webcam app and provides
4
+ real-time obstacle detection, path analysis, slope estimation,
5
+ and voice guidance.
6
+
7
+ Usage:
8
+ 1. Install "IP Webcam" app on Android (or similar on iPhone)
9
+ 2. Start the app, note the URL (e.g., http://192.168.1.5:8080)
10
+ 3. Run: python app_mobile.py http://192.168.1.5:8080
11
+ Or: python app_mobile.py (uses laptop webcam)
12
+ """
13
+ import cv2
14
+ import numpy as np
15
+ import sys
16
+ import os
17
+ import time
18
+ import math
19
+
20
+ sys.path.insert(0, os.path.dirname(__file__))
21
+
22
+ from config import DEFAULT_DEPTH_EVERY
23
+ from core import detector, depth, pose, risk_engine
24
+ from core.depth import SlopeSmoother
25
+ from core.detector import ObstacleTracker
26
+ from core.guidance import GuidanceEngine
27
+ from core.tts import TTSEngine
28
+ from core.camera import CameraStream
29
+
30
+
31
+ def draw_path_zone(frame, obstacles, slope_dir, slope_angle):
32
+ """Draw safe walking path overlay on the frame."""
33
+ h, w = frame.shape[:2]
34
+ overlay = frame.copy()
35
+
36
+ # Define path corridor (center third, bottom half)
37
+ path_left = w // 4
38
+ path_right = 3 * w // 4
39
+ path_top = h // 2
40
+ path_bottom = h
41
+
42
+ # Check if path is blocked
43
+ blocked_zones = []
44
+ for ob in obstacles:
45
+ x1, y1, x2, y2 = ob['box']
46
+ if y2 > path_top: # obstacle in lower half
47
+ blocked_zones.append(ob)
48
+
49
+ if not blocked_zones:
50
+ # Green path — clear
51
+ pts = np.array([
52
+ [path_left + 30, path_top],
53
+ [path_right - 30, path_top],
54
+ [path_right + 20, path_bottom],
55
+ [path_left - 20, path_bottom],
56
+ ])
57
+ cv2.fillPoly(overlay, [pts], (0, 120, 0))
58
+ cv2.addWeighted(overlay, 0.2, frame, 0.8, 0, frame)
59
+ cv2.polylines(frame, [pts], True, (0, 255, 0), 2)
60
+ cv2.putText(frame, "CLEAR PATH", (w // 2 - 60, path_top + 30),
61
+ cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2)
62
+ else:
63
+ # Find which side is clear
64
+ left_blocked = any(ob['box'][0] < w // 2 for ob in blocked_zones)
65
+ right_blocked = any(ob['box'][2] > w // 2 for ob in blocked_zones)
66
+
67
+ if not left_blocked:
68
+ # Suggest left path
69
+ pts = np.array([[10, path_top], [w // 3, path_top],
70
+ [w // 3 + 20, path_bottom], [10, path_bottom]])
71
+ cv2.fillPoly(overlay, [pts], (0, 120, 0))
72
+ cv2.addWeighted(overlay, 0.25, frame, 0.75, 0, frame)
73
+ cv2.arrowedLine(frame, (w // 2, h // 2), (w // 6, h // 2),
74
+ (0, 255, 0), 3, tipLength=0.3)
75
+ cv2.putText(frame, "GO LEFT", (20, path_top + 30),
76
+ cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)
77
+ elif not right_blocked:
78
+ # Suggest right path
79
+ pts = np.array([[2 * w // 3, path_top], [w - 10, path_top],
80
+ [w - 10, path_bottom], [2 * w // 3 - 20, path_bottom]])
81
+ cv2.fillPoly(overlay, [pts], (0, 120, 0))
82
+ cv2.addWeighted(overlay, 0.25, frame, 0.75, 0, frame)
83
+ cv2.arrowedLine(frame, (w // 2, h // 2), (5 * w // 6, h // 2),
84
+ (0, 255, 0), 3, tipLength=0.3)
85
+ cv2.putText(frame, "GO RIGHT", (2 * w // 3, path_top + 30),
86
+ cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)
87
+ else:
88
+ # Both sides blocked
89
+ cv2.rectangle(overlay, (0, path_top), (w, path_bottom), (0, 0, 150), -1)
90
+ cv2.addWeighted(overlay, 0.3, frame, 0.7, 0, frame)
91
+ cv2.putText(frame, "STOP - PATH BLOCKED", (w // 2 - 120, h // 2),
92
+ cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2)
93
+
94
+ # Slope direction arrow at bottom center
95
+ if abs(slope_angle) > 3:
96
+ acx, acy = w // 2, h - 40
97
+ arad = math.radians(slope_angle)
98
+ ax = int(acx + math.cos(arad) * 35)
99
+ ay = int(acy - math.sin(arad) * 35)
100
+ cv2.arrowedLine(frame, (acx, acy), (ax, ay), (0, 255, 255), 3, tipLength=0.3)
101
+ cv2.putText(frame, f"{slope_dir} {slope_angle:.0f} deg",
102
+ (acx - 60, acy + 20), cv2.FONT_HERSHEY_SIMPLEX, 0.4, (0, 255, 255), 1)
103
+
104
+
105
+ def draw_obstacles_bold(frame, obstacles):
106
+ """Draw highly visible obstacle markers."""
107
+ for ob in obstacles:
108
+ x1, y1, x2, y2 = ob['box']
109
+ u = ob['proximity']
110
+
111
+ # Color by urgency
112
+ if u > 0.7:
113
+ color = (0, 0, 255) # red = NEAR
114
+ label_bg = (0, 0, 200)
115
+ elif u > 0.4:
116
+ color = (0, 180, 255) # orange = MID
117
+ label_bg = (0, 140, 200)
118
+ else:
119
+ color = (0, 200, 0) # green = FAR
120
+ label_bg = (0, 160, 0)
121
+
122
+ # Semi-transparent fill
123
+ overlay = frame.copy()
124
+ cv2.rectangle(overlay, (x1, y1), (x2, y2), color, -1)
125
+ alpha = 0.35 if u > 0.7 else 0.2
126
+ cv2.addWeighted(overlay, alpha, frame, 1 - alpha, 0, frame)
127
+
128
+ # Thick border
129
+ cv2.rectangle(frame, (x1, y1), (x2, y2), color, 3)
130
+
131
+ # Label
132
+ label = f"{ob['label'].upper()} {ob['dist']}"
133
+ if 'track_id' in ob:
134
+ label = f"#{ob['track_id']} {label}"
135
+ (tw, th), _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.6, 2)
136
+ cv2.rectangle(frame, (x1, y1 - th - 10), (x1 + tw + 6, y1), label_bg, -1)
137
+ cv2.putText(frame, label, (x1 + 3, y1 - 5),
138
+ cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)
139
+
140
+ # Direction arrow
141
+ cx, cy = ob['center']
142
+ if ob['direction'] == "LEFT":
143
+ cv2.arrowedLine(frame, (cx + 50, cy), (cx - 50, cy), (255, 255, 255), 3, tipLength=0.4)
144
+ elif ob['direction'] == "RIGHT":
145
+ cv2.arrowedLine(frame, (cx - 50, cy), (cx + 50, cy), (255, 255, 255), 3, tipLength=0.4)
146
+ else:
147
+ cv2.arrowedLine(frame, (cx, cy - 40), (cx, cy + 40), (0, 0, 255), 3, tipLength=0.4)
148
+
149
+ # Distance warning for NEAR
150
+ if u > 0.7:
151
+ cv2.putText(frame, "!! CLOSE !!", (cx - 40, y2 + 20),
152
+ cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 2)
153
+
154
+
155
+ def draw_hud(frame, risk_dict, guidance, fps_val):
156
+ """Draw navigation HUD."""
157
+ h, w = frame.shape[:2]
158
+
159
+ # Top bar
160
+ cv2.rectangle(frame, (0, 0), (w, 80), (0, 0, 0), -1)
161
+
162
+ # Risk badge
163
+ risk = risk_dict['risk']
164
+ rc = {'SAFE': (0, 180, 0), 'LOW': (0, 220, 0), 'MEDIUM': (0, 180, 220), 'HIGH': (0, 0, 220)}[risk]
165
+ cv2.rectangle(frame, (5, 5), (180, 38), rc, -1)
166
+ cv2.putText(frame, f"RISK: {risk} ({risk_dict['score']})", (10, 30),
167
+ cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)
168
+
169
+ # Slope
170
+ cv2.putText(frame, f"Slope: {risk_dict['terrain']} {risk_dict['terrain_slope']:.0f} deg",
171
+ (190, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (200, 200, 200), 1)
172
+
173
+ # Guidance
174
+ cv2.putText(frame, f"Step: {guidance['step']} | Lean: {guidance['lean']} | Knee: {guidance['knee_rec']} deg",
175
+ (5, 55), cv2.FONT_HERSHEY_SIMPLEX, 0.4, (0, 255, 255), 1)
176
+
177
+ # FPS
178
+ cv2.putText(frame, f"{fps_val:.0f}fps", (w - 60, 30),
179
+ cv2.FONT_HERSHEY_SIMPLEX, 0.4, (100, 100, 100), 1)
180
+
181
+ # Bottom voice bar
182
+ cv2.rectangle(frame, (0, h - 45), (w, h), (20, 20, 40), -1)
183
+ cv2.putText(frame, "VOICE:", (8, h - 20), cv2.FONT_HERSHEY_SIMPLEX, 0.45, (100, 180, 255), 1)
184
+ cv2.putText(frame, guidance['voice'][:60], (75, h - 20),
185
+ cv2.FONT_HERSHEY_SIMPLEX, 0.4, (255, 255, 255), 1)
186
+
187
+
188
+ def main():
189
+ # Parse camera source
190
+ if len(sys.argv) > 1 and sys.argv[1] != "--help":
191
+ source = sys.argv[1]
192
+ # IP Webcam app URLs
193
+ if source.startswith("http") and not source.endswith("/video"):
194
+ source = source.rstrip("/") + "/video"
195
+ print(f"Connecting to: {source}")
196
+ else:
197
+ source = 0
198
+ print("Using laptop webcam (pass phone URL as argument)")
199
+
200
+ try:
201
+ cam = CameraStream(source)
202
+ except RuntimeError as e:
203
+ print(f"Error: {e}")
204
+ print("\nUsage:")
205
+ print(" python app_mobile.py # laptop webcam")
206
+ print(" python app_mobile.py http://192.168.1.5:8080 # IP Webcam app")
207
+ return
208
+
209
+ # Init components
210
+ guide = GuidanceEngine()
211
+ smoother = SlopeSmoother()
212
+ tracker = ObstacleTracker()
213
+ tts = TTSEngine(enabled=True)
214
+
215
+ cached_depth = None
216
+ cached_depth_mini = None
217
+ frame_count = 0
218
+ fps_val = 0.0
219
+ t0 = time.time()
220
+
221
+ print(f"\n{'='*50}")
222
+ print(f"LIVE NAVIGATION ASSISTANT")
223
+ print(f"Camera: {cam.w}x{cam.h} @ {cam.native_fps:.0f}fps")
224
+ print(f"Press 'q' to quit")
225
+ print(f"{'='*50}\n")
226
+
227
+ while cam.is_open:
228
+ frame = cam.read()
229
+ if frame is None:
230
+ time.sleep(0.01)
231
+ continue
232
+
233
+ h, w = frame.shape[:2]
234
+ rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
235
+
236
+ # Detect + track
237
+ obstacles = detector.detect(frame, track=True)
238
+ new_obs, closing_obs, _ = tracker.update(obstacles)
239
+
240
+ # Pose
241
+ gait, landmarks, foot_y = pose.analyze(rgb, w, h)
242
+
243
+ # Depth every 5th frame
244
+ frame_count += 1
245
+ if frame_count % DEFAULT_DEPTH_EVERY == 0 or cached_depth is None:
246
+ cached_depth = depth.estimate_depth(rgb, h, w)
247
+ raw_s, raw_d, raw_t, _ = depth.estimate_slope(cached_depth, h, w, foot_y)
248
+ smoother.update(raw_s, raw_d, raw_t)
249
+
250
+ # Risk + guidance
251
+ risk_dict = risk_engine.assess(
252
+ smoother.angle, smoother.direction, gait, obstacles,
253
+ slope_trend=smoother.trend, new_obstacles=new_obs, closing_obstacles=closing_obs)
254
+ guidance = guide.compute(
255
+ smoother.angle, smoother.direction, smoother.terrain, obstacles,
256
+ slope_trend=smoother.trend, new_obstacles=new_obs, closing_obstacles=closing_obs)
257
+
258
+ # Render
259
+ out = frame.copy()
260
+ draw_path_zone(out, obstacles, smoother.direction, smoother.angle)
261
+ draw_obstacles_bold(out, obstacles)
262
+ draw_hud(out, risk_dict, guidance, fps_val)
263
+
264
+ # TTS
265
+ if tts.enabled and guide.should_speak(guidance['voice'], smoother.angle):
266
+ tts.speak(guidance['voice'])
267
+
268
+ # FPS
269
+ if frame_count % 10 == 0:
270
+ fps_val = frame_count / (time.time() - t0)
271
+
272
+ cv2.imshow("Navigation Assistant", out)
273
+ key = cv2.waitKey(1) & 0xFF
274
+ if key == ord('q'):
275
+ break
276
+
277
+ cam.release()
278
+ cv2.destroyAllWindows()
279
+ tts.shutdown()
280
+ print(f"\nSession: {frame_count} frames in {time.time() - t0:.1f}s ({fps_val:.1f} fps)")
281
+
282
+
283
+ if __name__ == "__main__":
284
+ main()
app_new.py ADDED
@@ -0,0 +1,138 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Gradio UI — image analysis, video processing, and live webcam streaming."""
2
+ import gradio as gr
3
+ import cv2
4
+ import numpy as np
5
+ from PIL import Image as PILImage
6
+ from pipeline import process_image, process_video
7
+ from core import detector, depth, pose, risk_engine
8
+ from core.depth import SlopeSmoother
9
+ from core.detector import ObstacleTracker
10
+ from core.guidance import GuidanceEngine
11
+ from renderers import overlay as overlay_renderer
12
+
13
+ # ─── Image handler ───
14
+
15
+ def handle_image(input_image):
16
+ if input_image is None:
17
+ return None, "Upload an image."
18
+ img_bgr = cv2.cvtColor(np.array(input_image), cv2.COLOR_RGB2BGR)
19
+ rendered, risk_dict, guidance = process_image(img_bgr, mode="overlay")
20
+
21
+ rpt = f"## Risk: **{risk_dict['risk']}** (Score: {risk_dict['score']})\n\n"
22
+ rpt += f"**Terrain:** {risk_dict['terrain']} ({risk_dict['terrain_slope']:.1f}°)\n\n"
23
+ gs = risk_dict.get('gait_summary', {})
24
+ if gs:
25
+ rpt += "| Metric | Value |\n|---|---|\n"
26
+ for k, v in gs.items():
27
+ rpt += f"| {k.title()} | {v:.1f}° |\n"
28
+ rpt += "\n**Risk Factors:**\n"
29
+ rpt += "\n".join(f"- ⚠️ {r}" for r in risk_dict['reasons']) if risk_dict['reasons'] else "- ✅ None"
30
+ rpt += f"\n\n**Voice:** {guidance['voice']}"
31
+
32
+ return PILImage.fromarray(cv2.cvtColor(rendered, cv2.COLOR_BGR2RGB)), rpt
33
+
34
+
35
+ # ─── Video handler ───
36
+
37
+ def handle_video(video, mode):
38
+ if video is None:
39
+ return None
40
+ return process_video(video, mode=mode)
41
+
42
+
43
+ # ─── Live webcam handler (Gradio streaming) ───
44
+
45
+ # Persistent state for live stream
46
+ _live_state = {
47
+ 'guide': None,
48
+ 'smoother': None,
49
+ 'tracker': None,
50
+ 'depth': None,
51
+ 'depth_mini': None,
52
+ 'counter': 0,
53
+ }
54
+
55
+
56
+ def _reset_live():
57
+ _live_state['guide'] = GuidanceEngine()
58
+ _live_state['smoother'] = SlopeSmoother()
59
+ _live_state['tracker'] = ObstacleTracker()
60
+ _live_state['depth'] = None
61
+ _live_state['depth_mini'] = None
62
+ _live_state['counter'] = 0
63
+
64
+
65
+ def handle_webcam_frame(frame):
66
+ """Process a single webcam frame from Gradio's streaming input."""
67
+ if frame is None:
68
+ return None, ""
69
+
70
+ # Init state on first frame
71
+ if _live_state['guide'] is None:
72
+ _reset_live()
73
+
74
+ img_bgr = cv2.cvtColor(np.array(frame), cv2.COLOR_RGB2BGR)
75
+ h, w = img_bgr.shape[:2]
76
+ rgb = np.array(frame)
77
+
78
+ obstacles = detector.detect(img_bgr, track=True)
79
+ new_obs, closing_obs, _ = _live_state['tracker'].update(obstacles)
80
+ gait, landmarks, foot_y = pose.analyze(rgb, w, h)
81
+
82
+ _live_state['counter'] += 1
83
+ if _live_state['counter'] % 5 == 0 or _live_state['depth'] is None:
84
+ _live_state['depth'] = depth.estimate_depth(rgb, h, w)
85
+ raw_s, raw_d, raw_t, _ = depth.estimate_slope(_live_state['depth'], h, w, foot_y)
86
+ _live_state['smoother'].update(raw_s, raw_d, raw_t)
87
+ _live_state['depth_mini'] = overlay_renderer.render_depth_mini(_live_state['depth'], w, h)
88
+
89
+ sm = _live_state['smoother']
90
+ risk_dict = risk_engine.assess(sm.angle, sm.direction, gait, len(obstacles))
91
+ guidance = _live_state['guide'].compute(
92
+ sm.angle, sm.direction, sm.terrain, obstacles,
93
+ slope_trend=sm.trend, new_obstacles=new_obs, closing_obstacles=closing_obs)
94
+
95
+ rendered = overlay_renderer.render(
96
+ img_bgr, obstacles, gait, landmarks, risk_dict, guidance, _live_state['depth_mini'])
97
+
98
+ status = (f"**{risk_dict['risk']}** | Slope: {sm.direction} {sm.angle:.0f}° "
99
+ f"[{sm.trend}] | Obs: {len(obstacles)} | {guidance['voice'][:80]}")
100
+
101
+ return PILImage.fromarray(cv2.cvtColor(rendered, cv2.COLOR_BGR2RGB)), status
102
+
103
+
104
+ # ─── Build UI ───
105
+
106
+ with gr.Blocks(title="Navigation Assist") as demo:
107
+ gr.Markdown("# 🦯 Vision-Based Navigation Assistance")
108
+
109
+ with gr.Tab("📷 Image"):
110
+ with gr.Row():
111
+ img_in = gr.Image(type="pil", label="Upload Image")
112
+ img_out = gr.Image(label="Analysis")
113
+ report = gr.Markdown()
114
+ gr.Button("🔍 Analyze", variant="primary").click(
115
+ fn=handle_image, inputs=img_in, outputs=[img_out, report])
116
+
117
+ with gr.Tab("🎥 Video"):
118
+ with gr.Row():
119
+ vid_in = gr.Video(label="Upload Video")
120
+ vid_out = gr.Video(label="Output")
121
+ vid_mode = gr.Radio(["overlay", "blind_nav"], value="overlay", label="Render Mode")
122
+ gr.Button("🔍 Process", variant="primary").click(
123
+ fn=handle_video, inputs=[vid_in, vid_mode], outputs=vid_out)
124
+
125
+ with gr.Tab("📹 Live Camera"):
126
+ gr.Markdown("Enable your webcam below. Each frame is processed in real-time.")
127
+ with gr.Row():
128
+ cam_in = gr.Image(sources=["webcam"], streaming=True, label="Webcam")
129
+ cam_out = gr.Image(label="Live Analysis")
130
+ live_status = gr.Markdown("Waiting for camera...")
131
+ cam_in.stream(fn=handle_webcam_frame, inputs=cam_in, outputs=[cam_out, live_status])
132
+ gr.Button("🔄 Reset State").click(fn=lambda: (_reset_live(), None, "Reset."),
133
+ outputs=[cam_out, live_status])
134
+
135
+ gr.Markdown("---\n**Models:** YOLOv8n · BlazePose · Depth Anything · Rule-based risk fusion")
136
+
137
+ if __name__ == "__main__":
138
+ demo.launch(server_name="0.0.0.0", server_port=7860, show_error=True)
benchmark.py ADDED
@@ -0,0 +1,98 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Benchmark each pipeline component to identify bottlenecks.
2
+
3
+ Usage: python benchmark.py [image_path] [--depth-model small|base|v2-small|v2-base] [--size 256|384|512]
4
+ """
5
+ import cv2
6
+ import time
7
+ import sys
8
+ import os
9
+ import numpy as np
10
+
11
+
12
+ def bench(label, fn, runs=3):
13
+ """Run fn multiple times, print avg latency."""
14
+ times = []
15
+ for i in range(runs):
16
+ t0 = time.time()
17
+ result = fn()
18
+ times.append(time.time() - t0)
19
+ avg = sum(times) / len(times)
20
+ fps = 1.0 / avg if avg > 0 else 999
21
+ print(f" {label:30s} {avg*1000:7.1f}ms ({fps:.1f} fps)")
22
+ return result, avg
23
+
24
+
25
+ def main():
26
+ # Parse args
27
+ img_path = None
28
+ for a in sys.argv[1:]:
29
+ if not a.startswith("--") and os.path.exists(a):
30
+ img_path = a
31
+
32
+ if "--depth-model" in sys.argv:
33
+ idx = sys.argv.index("--depth-model")
34
+ os.environ["NAV_DEPTH_MODEL"] = sys.argv[idx + 1]
35
+
36
+ if "--size" in sys.argv:
37
+ idx = sys.argv.index("--size")
38
+ os.environ["NAV_DEPTH_INPUT_SIZE"] = sys.argv[idx + 1]
39
+
40
+ # Use a test image or generate one
41
+ if img_path:
42
+ img = cv2.imread(img_path)
43
+ else:
44
+ test_dir = "/mnt/c/Visual/test_images"
45
+ candidates = [f for f in os.listdir(test_dir) if f.endswith('.jpg') and '_pose' not in f]
46
+ if candidates:
47
+ img = cv2.imread(os.path.join(test_dir, candidates[0]))
48
+ else:
49
+ print("No test image found. Pass an image path.")
50
+ return
51
+
52
+ h, w = img.shape[:2]
53
+ rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
54
+
55
+ from config import DEPTH_MODEL, DEPTH_INPUT_SIZE, DEVICE
56
+ print(f"Image: {w}x{h}")
57
+ print(f"Depth model: {DEPTH_MODEL} input_size: {DEPTH_INPUT_SIZE} device: {DEVICE}")
58
+ print(f"{'='*60}")
59
+
60
+ # Warm up + benchmark each component
61
+ from core import detector, depth, pose, risk_engine
62
+
63
+ print("\n[1] YOLO Obstacle Detection")
64
+ obstacles, t_yolo = bench("yolo detect", lambda: detector.detect(img))
65
+
66
+ print("\n[2] YOLO + ByteTrack")
67
+ _, t_track = bench("yolo track", lambda: detector.detect(img, track=True))
68
+
69
+ print("\n[3] BlazePose Gait Analysis")
70
+ pose_result, t_pose = bench("pose analyze", lambda: pose.analyze(rgb, w, h))
71
+ gait, landmarks, foot_y = pose_result
72
+
73
+ print("\n[4] Depth Estimation")
74
+ depth_norm, t_depth = bench("depth estimate", lambda: depth.estimate_depth(rgb, h, w))
75
+
76
+ print("\n[5] Slope Analysis (on cached depth)")
77
+ _, t_slope = bench("slope estimate", lambda: depth.estimate_slope(depth_norm, h, w, foot_y))
78
+
79
+ print("\n[6] Risk Assessment")
80
+ _, t_risk = bench("risk assess", lambda: risk_engine.assess(0.0, "FLAT", gait, len(obstacles)))
81
+
82
+ print(f"\n{'='*60}")
83
+ total = t_yolo + t_pose + t_depth + t_slope + t_risk
84
+ print(f" {'TOTAL (per frame)':30s} {total*1000:7.1f}ms ({1.0/total:.1f} fps)")
85
+ print(f" {'Without depth':30s} {(total-t_depth)*1000:7.1f}ms ({1.0/(total-t_depth):.1f} fps)")
86
+ print(f"\nDepth is {t_depth/total*100:.0f}% of total latency.")
87
+
88
+ if t_depth > 0.15:
89
+ print("\nTips to speed up depth:")
90
+ print(f" - Current input size: {DEPTH_INPUT_SIZE}. Try: NAV_DEPTH_INPUT_SIZE=256")
91
+ print(f" - Current model: {DEPTH_MODEL}. 'small' is fastest.")
92
+ if DEVICE == "cpu":
93
+ print(" - Running on CPU. Set NAV_DEVICE=cuda if GPU available.")
94
+ print(" - Export to ONNX: set NAV_DEPTH_ONNX=/path/to/model.onnx")
95
+
96
+
97
+ if __name__ == "__main__":
98
+ main()
config.py ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Centralized configuration for the navigation system."""
2
+ import os
3
+
4
+ # Model paths
5
+ YOLO_MODEL = os.environ.get("NAV_YOLO_MODEL", "yolov8n.pt")
6
+ POSE_MODEL = os.environ.get("NAV_POSE_MODEL", "/mnt/c/Visual/pose_landmarker_heavy.task")
7
+ # Depth model options: "small", "base", "v2-small", "v2-base"
8
+ # v2 models are more accurate. "small" variants are faster.
9
+ DEPTH_MODEL = os.environ.get("NAV_DEPTH_MODEL", "small")
10
+ DEPTH_INPUT_SIZE = int(os.environ.get("NAV_DEPTH_INPUT_SIZE", 384)) # resize before inference
11
+ DEVICE = os.environ.get("NAV_DEVICE", "cpu")
12
+
13
+ # Auto-detect GPU if not explicitly set
14
+ if "NAV_DEVICE" not in os.environ:
15
+ try:
16
+ import torch
17
+ if torch.cuda.is_available():
18
+ DEVICE = "cuda"
19
+ except ImportError:
20
+ pass
21
+
22
+ # Detection
23
+ YOLO_CONF = 0.35
24
+ OBSTACLE_CLASSES = {
25
+ 0: 'person', 1: 'bicycle', 2: 'car', 3: 'motorcycle', 5: 'bus', 7: 'truck',
26
+ 9: 'traffic light', 10: 'fire hydrant', 11: 'stop sign', 13: 'bench',
27
+ 15: 'cat', 16: 'dog', 24: 'backpack', 25: 'umbrella', 56: 'chair',
28
+ 57: 'couch', 58: 'potted plant', 60: 'dining table',
29
+ }
30
+
31
+ # Depth / slope
32
+ GROUND_RATIO = 0.55 # default ground region starts at 55% of frame height
33
+ SLOPE_MULTIPLIER = 20.0 # arctan scaling — calibrate on known slopes
34
+ SLOPE_CLAMP = 35.0 # max slope angle (degrees)
35
+ SLOPE_DEADZONE = 3.0 # angles below this → FLAT
36
+ SLOPE_SMOOTHING = 0.7 # EMA alpha for temporal smoothing (0=no smoothing, 1=full cache)
37
+
38
+ # Terrain roughness thresholds (std of ground depth)
39
+ TERRAIN_ROCKY_THRESH = 0.25
40
+ TERRAIN_ROUGH_THRESH = 0.15
41
+
42
+ # Pose landmark indices
43
+ LM = {
44
+ 'L_SHOULDER': 11, 'R_SHOULDER': 12,
45
+ 'L_HIP': 23, 'R_HIP': 24,
46
+ 'L_KNEE': 25, 'R_KNEE': 26,
47
+ 'L_ANKLE': 27, 'R_ANKLE': 28,
48
+ 'L_HEEL': 29, 'R_HEEL': 30,
49
+ 'L_FOOT': 31, 'R_FOOT': 32,
50
+ }
51
+ SKELETON_CONNS = [
52
+ (11, 13), (13, 15), (12, 14), (14, 16), (11, 12), (11, 23), (12, 24), (23, 24),
53
+ (23, 25), (25, 27), (27, 29), (27, 31), (29, 31),
54
+ (24, 26), (26, 28), (28, 30), (28, 32), (30, 32),
55
+ ]
56
+
57
+ # Risk thresholds
58
+ RISK_HIGH = 60
59
+ RISK_MEDIUM = 30
60
+ RISK_LOW = 10
61
+
62
+ # User mobility profiles: "default", "elderly", "athletic"
63
+ RISK_PROFILE = os.environ.get("NAV_RISK_PROFILE", "default")
64
+
65
+ # Voice throttle
66
+ VOICE_COOLDOWN_SEC = 2.5 # min seconds between repeated messages
67
+ VOICE_SLOPE_DELTA = 5.0 # slope must change by this much to re-announce
68
+
69
+ # Video processing
70
+ DEFAULT_SKIP_FRAMES = 2
71
+ DEFAULT_DEPTH_EVERY = 5
core/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+
core/camera.py ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Live camera capture with threaded frame reading for consistent FPS.
2
+
3
+ Supports:
4
+ - USB webcam: CameraStream(0)
5
+ - RTSP/IP cam: CameraStream("rtsp://user:pass@192.168.1.100:554/stream")
6
+ - Video file (treated as stream): CameraStream("/path/to/video.mp4")
7
+ """
8
+ import cv2
9
+ import threading
10
+ import time
11
+
12
+
13
+ class CameraStream:
14
+ """Threaded camera reader — always holds the latest frame, never blocks."""
15
+
16
+ def __init__(self, source=0, target_fps=15):
17
+ self.source = source
18
+ self.target_fps = target_fps
19
+ self._cap = cv2.VideoCapture(source)
20
+
21
+ if not self._cap.isOpened():
22
+ raise RuntimeError(f"Cannot open camera: {source}")
23
+
24
+ # Read one frame to get dimensions
25
+ ret, frame = self._cap.read()
26
+ if not ret:
27
+ raise RuntimeError(f"Cannot read from camera: {source}")
28
+
29
+ self.frame = frame
30
+ self.w = int(self._cap.get(3))
31
+ self.h = int(self._cap.get(4))
32
+ self.native_fps = self._cap.get(5) or 30.0
33
+
34
+ self._lock = threading.Lock()
35
+ self._stop = threading.Event()
36
+ self._thread = threading.Thread(target=self._reader, daemon=True)
37
+ self._thread.start()
38
+
39
+ print(f"[Camera] Opened {source} ({self.w}x{self.h} @ {self.native_fps:.0f}fps)", flush=True)
40
+
41
+ def _reader(self):
42
+ """Continuously grab frames in background."""
43
+ while not self._stop.is_set():
44
+ ret, frame = self._cap.read()
45
+ if not ret:
46
+ # End of stream or disconnect — try reconnect for RTSP
47
+ if isinstance(self.source, str) and self.source.startswith("rtsp"):
48
+ print("[Camera] Lost connection, reconnecting...", flush=True)
49
+ time.sleep(2)
50
+ self._cap.release()
51
+ self._cap = cv2.VideoCapture(self.source)
52
+ continue
53
+ break
54
+ with self._lock:
55
+ self.frame = frame
56
+
57
+ def read(self):
58
+ """Get the latest frame. Never blocks."""
59
+ with self._lock:
60
+ return self.frame.copy() if self.frame is not None else None
61
+
62
+ @property
63
+ def is_open(self):
64
+ return not self._stop.is_set() and self._cap.isOpened()
65
+
66
+ def release(self):
67
+ self._stop.set()
68
+ self._thread.join(timeout=3)
69
+ self._cap.release()
core/depth.py ADDED
@@ -0,0 +1,233 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Monocular depth estimation with model selection, resolution scaling, and temporal smoothing.
2
+
3
+ Supported models (set via NAV_DEPTH_MODEL env or config.py):
4
+ "small" — Depth Anything V1 Small (fastest)
5
+ "base" — Depth Anything V1 Base
6
+ "v2-small" — Depth Anything V2 Small (recommended for MVP)
7
+ "v2-base" — Depth Anything V2 Base (best accuracy)
8
+
9
+ Performance knobs:
10
+ NAV_DEPTH_INPUT_SIZE — resize input before inference (default 384, try 256 for speed)
11
+ NAV_DEVICE — "cpu" or "cuda"
12
+ """
13
+ import cv2
14
+ import numpy as np
15
+ import math
16
+ import time
17
+ from config import (
18
+ DEPTH_MODEL, DEPTH_INPUT_SIZE, DEVICE, GROUND_RATIO, SLOPE_MULTIPLIER,
19
+ SLOPE_CLAMP, SLOPE_DEADZONE,
20
+ TERRAIN_ROCKY_THRESH, TERRAIN_ROUGH_THRESH,
21
+ )
22
+
23
+ _MODEL_MAP = {
24
+ "small": "LiheYoung/depth-anything-small-hf",
25
+ "base": "LiheYoung/depth-anything-base-hf",
26
+ "v2-small": "depth-anything/Depth-Anything-V2-Small-hf",
27
+ "v2-base": "depth-anything/Depth-Anything-V2-Base-hf",
28
+ }
29
+
30
+ _depth_pipe = None
31
+ _onnx_session = None
32
+ _backend = None # "hf" or "onnx"
33
+
34
+
35
+ def _load():
36
+ global _depth_pipe, _backend
37
+ if _depth_pipe is not None:
38
+ return
39
+
40
+ model_id = _MODEL_MAP.get(DEPTH_MODEL, DEPTH_MODEL)
41
+
42
+ # Try ONNX first if available
43
+ if _try_load_onnx(model_id):
44
+ return
45
+
46
+ # Fall back to HuggingFace pipeline
47
+ from transformers import pipeline as hf_pipeline
48
+ t0 = time.time()
49
+ _depth_pipe = hf_pipeline("depth-estimation", model=model_id, device=DEVICE)
50
+ _backend = "hf"
51
+ print(f"[Depth] Loaded {model_id} on {DEVICE} ({time.time()-t0:.1f}s)", flush=True)
52
+
53
+
54
+ def _try_load_onnx(model_id):
55
+ """Try loading an ONNX-exported model for faster CPU inference."""
56
+ global _onnx_session, _backend
57
+ try:
58
+ import onnxruntime as ort
59
+ import os
60
+ # Look for local ONNX file
61
+ onnx_path = os.environ.get("NAV_DEPTH_ONNX")
62
+ if not onnx_path:
63
+ return False
64
+ if not os.path.exists(onnx_path):
65
+ print(f"[Depth] ONNX path not found: {onnx_path}", flush=True)
66
+ return False
67
+
68
+ providers = ['CUDAExecutionProvider', 'CPUExecutionProvider'] if DEVICE == "cuda" else ['CPUExecutionProvider']
69
+ t0 = time.time()
70
+ _onnx_session = ort.InferenceSession(onnx_path, providers=providers)
71
+ _backend = "onnx"
72
+ print(f"[Depth] Loaded ONNX model ({time.time()-t0:.1f}s) providers={_onnx_session.get_providers()}", flush=True)
73
+ return True
74
+ except ImportError:
75
+ return False
76
+
77
+
78
+ def _infer_hf(rgb_small):
79
+ """Run HuggingFace pipeline on a (possibly resized) RGB image."""
80
+ from PIL import Image as PILImage
81
+ return np.array(_depth_pipe(PILImage.fromarray(rgb_small))["depth"]).astype(np.float32)
82
+
83
+
84
+ def _infer_onnx(rgb_small):
85
+ """Run ONNX session on preprocessed input."""
86
+ # Standard normalization for Depth Anything
87
+ img = rgb_small.astype(np.float32) / 255.0
88
+ mean = np.array([0.485, 0.456, 0.406], dtype=np.float32)
89
+ std = np.array([0.229, 0.224, 0.225], dtype=np.float32)
90
+ img = (img - mean) / std
91
+ img = np.transpose(img, (2, 0, 1))[np.newaxis] # NCHW
92
+
93
+ input_name = _onnx_session.get_inputs()[0].name
94
+ result = _onnx_session.run(None, {input_name: img})
95
+ return result[0].squeeze().astype(np.float32)
96
+
97
+
98
+ def estimate_depth(frame_rgb, h, w):
99
+ """Run depth model. Returns normalized depth map (0-1) at original frame resolution."""
100
+ _load()
101
+
102
+ # Resize for faster inference
103
+ inp_size = DEPTH_INPUT_SIZE
104
+ if h > inp_size or w > inp_size:
105
+ scale = inp_size / max(h, w)
106
+ sh, sw = int(h * scale), int(w * scale)
107
+ rgb_small = cv2.resize(frame_rgb, (sw, sh))
108
+ else:
109
+ rgb_small = frame_rgb
110
+
111
+ # Inference
112
+ if _backend == "onnx":
113
+ # ONNX needs exact square input for some exports
114
+ sq = cv2.resize(rgb_small, (inp_size, inp_size))
115
+ dm = _infer_onnx(sq)
116
+ else:
117
+ dm = _infer_hf(rgb_small)
118
+
119
+ # Resize back to original resolution
120
+ dm = cv2.resize(dm, (w, h))
121
+
122
+ dmin, dmax = dm.min(), dm.max()
123
+ if dmax - dmin < 1e-8:
124
+ return np.zeros((h, w), dtype=np.float32)
125
+ return (dm - dmin) / (dmax - dmin)
126
+
127
+
128
+ def estimate_slope(depth_norm, h, w, foot_y=None):
129
+ """Compute slope angle and direction from depth map ground region.
130
+
131
+ Returns (slope_angle, slope_dir, terrain_type, ground_start_y).
132
+ """
133
+ if foot_y and foot_y > h * 0.3:
134
+ gs = max(0, int(foot_y) - int(h * 0.1))
135
+ else:
136
+ gs = int(h * GROUND_RATIO)
137
+
138
+ ground = depth_norm[gs:, :]
139
+ gh, gw = ground.shape
140
+ if gh < 10 or gw < 10:
141
+ return 0.0, "FLAT", "SMOOTH", gs
142
+
143
+ gy = cv2.Sobel(ground, cv2.CV_64F, 0, 1, ksize=5)
144
+ cl, cr = gw // 3, 2 * gw // 3
145
+
146
+ rows = []
147
+ sh = max(1, gh // 6)
148
+ for i in range(6):
149
+ y0, y1 = i * sh, min((i + 1) * sh, gh)
150
+ c = np.concatenate([gy[y0:y1, :cl].flatten(), gy[y0:y1, cr:].flatten()])
151
+ if len(c):
152
+ rows.append(float(np.median(c)))
153
+
154
+ if len(rows) < 3:
155
+ return 0.0, "FLAT", "SMOOTH", gs
156
+
157
+ trend = np.polyfit(np.arange(len(rows)), np.array(rows), 1)[0]
158
+ sa = float(np.clip(np.arctan(trend * SLOPE_MULTIPLIER) * 180 / math.pi,
159
+ -SLOPE_CLAMP, SLOPE_CLAMP))
160
+ if abs(sa) < SLOPE_DEADZONE:
161
+ sa = 0.0
162
+
163
+ slope_dir = "FLAT" if abs(sa) < SLOPE_DEADZONE else ("UPHILL" if sa > 0 else "DOWNHILL")
164
+
165
+ gvar = float(np.std(ground))
166
+ if gvar > TERRAIN_ROCKY_THRESH:
167
+ terrain = "ROCKY"
168
+ elif gvar > TERRAIN_ROUGH_THRESH:
169
+ terrain = "ROUGH"
170
+ else:
171
+ terrain = "SMOOTH"
172
+
173
+ return sa, slope_dir, terrain, gs
174
+
175
+
176
+ class SlopeSmoother:
177
+ """Temporal smoothing for slope estimates across video frames."""
178
+
179
+ def __init__(self, alpha=0.7, outlier_thresh=15.0):
180
+ self.alpha = alpha
181
+ self.outlier_thresh = outlier_thresh
182
+ self._angle = 0.0
183
+ self._dir = "FLAT"
184
+ self._terrain = "SMOOTH"
185
+ self._history = []
186
+ self._max_history = 10
187
+
188
+ def update(self, raw_angle, raw_dir, raw_terrain):
189
+ """Feed a new raw slope measurement. Returns smoothed (angle, dir, terrain)."""
190
+ self._history.append(raw_angle)
191
+ if len(self._history) > self._max_history:
192
+ self._history.pop(0)
193
+
194
+ # Outlier rejection
195
+ if len(self._history) >= 3:
196
+ median = float(np.median(self._history))
197
+ if abs(raw_angle - median) > self.outlier_thresh:
198
+ raw_angle = median
199
+
200
+ self._angle = self.alpha * self._angle + (1 - self.alpha) * raw_angle
201
+
202
+ if abs(self._angle) < SLOPE_DEADZONE:
203
+ self._angle = 0.0
204
+ self._dir = "FLAT"
205
+ else:
206
+ self._dir = "UPHILL" if self._angle > 0 else "DOWNHILL"
207
+
208
+ self._terrain = raw_terrain
209
+ return self._angle, self._dir, self._terrain
210
+
211
+ @property
212
+ def trend(self):
213
+ if len(self._history) < 4:
214
+ return "STABLE"
215
+ recent = self._history[-4:]
216
+ avg_diff = sum(recent[i+1] - recent[i] for i in range(len(recent)-1)) / (len(recent)-1)
217
+ if avg_diff > 2.0:
218
+ return "STEEPENING"
219
+ elif avg_diff < -2.0:
220
+ return "FLATTENING"
221
+ return "STABLE"
222
+
223
+ @property
224
+ def angle(self):
225
+ return self._angle
226
+
227
+ @property
228
+ def direction(self):
229
+ return self._dir
230
+
231
+ @property
232
+ def terrain(self):
233
+ return self._terrain
core/detector.py ADDED
@@ -0,0 +1,101 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """YOLO obstacle detection with ByteTrack tracking and state management."""
2
+ from config import YOLO_MODEL, YOLO_CONF, OBSTACLE_CLASSES
3
+
4
+ _yolo = None
5
+
6
+
7
+ def _load():
8
+ global _yolo
9
+ if _yolo is None:
10
+ from ultralytics import YOLO
11
+ _yolo = YOLO(YOLO_MODEL)
12
+ return _yolo
13
+
14
+
15
+ def detect(frame, track=False):
16
+ """Detect obstacles. If track=True, uses ByteTrack for persistent IDs."""
17
+ yolo = _load()
18
+ h, w = frame.shape[:2]
19
+
20
+ if track:
21
+ results = yolo.track(frame, conf=YOLO_CONF, verbose=False, persist=True)[0]
22
+ else:
23
+ results = yolo(frame, conf=YOLO_CONF, verbose=False)[0]
24
+
25
+ obstacles = []
26
+ for box in results.boxes:
27
+ cls_id = int(box.cls[0])
28
+ if cls_id not in OBSTACLE_CLASSES:
29
+ continue
30
+ x1, y1, x2, y2 = map(int, box.xyxy[0])
31
+
32
+ # Filter out full-frame false positives (box covers >50% of frame area)
33
+ box_area = (x2 - x1) * (y2 - y1)
34
+ if box_area > 0.5 * h * w:
35
+ continue
36
+
37
+ proximity = y2 / h
38
+ cx = (x1 + x2) / 2
39
+ direction = "LEFT" if cx < w * 0.33 else "RIGHT" if cx > w * 0.66 else "CENTER"
40
+
41
+ ob = {
42
+ 'label': OBSTACLE_CLASSES[cls_id],
43
+ 'conf': float(box.conf[0]),
44
+ 'box': (x1, y1, x2, y2),
45
+ 'center': ((x1 + x2) // 2, (y1 + y2) // 2),
46
+ 'proximity': proximity,
47
+ 'dist': "NEAR" if proximity > 0.7 else "MID" if proximity > 0.4 else "FAR",
48
+ 'direction': direction,
49
+ }
50
+ if track and box.id is not None:
51
+ ob['track_id'] = int(box.id[0])
52
+ obstacles.append(ob)
53
+
54
+ return sorted(obstacles, key=lambda o: -o['proximity'])
55
+
56
+
57
+ class ObstacleTracker:
58
+ """Tracks obstacles across frames, detects new/closing objects."""
59
+
60
+ def __init__(self):
61
+ self._prev = {} # track_id -> previous obstacle dict
62
+ self._new_ids = set() # track_ids that appeared this frame
63
+ self._lost_ids = set() # track_ids that disappeared this frame
64
+
65
+ def update(self, obstacles):
66
+ """Update tracker state. Call once per frame after detect(track=True).
67
+
68
+ Returns (new_obstacles, closing_obstacles, lost_ids).
69
+ - new_obstacles: obstacles with track_ids not seen before
70
+ - closing_obstacles: obstacles whose proximity increased significantly
71
+ - lost_ids: track_ids from previous frame no longer present
72
+ """
73
+ current = {}
74
+ new_obs = []
75
+ closing_obs = []
76
+
77
+ for ob in obstacles:
78
+ tid = ob.get('track_id')
79
+ if tid is None:
80
+ continue
81
+ current[tid] = ob
82
+
83
+ if tid not in self._prev:
84
+ new_obs.append(ob)
85
+ else:
86
+ # Check if closing (proximity increasing = getting nearer)
87
+ prev_prox = self._prev[tid]['proximity']
88
+ delta = ob['proximity'] - prev_prox
89
+ if delta > 0.05: # moved noticeably closer
90
+ ob['closing_rate'] = round(delta, 3)
91
+ closing_obs.append(ob)
92
+
93
+ self._lost_ids = set(self._prev.keys()) - set(current.keys())
94
+ self._new_ids = set(current.keys()) - set(self._prev.keys())
95
+ self._prev = current
96
+
97
+ return new_obs, closing_obs, self._lost_ids
98
+
99
+ @property
100
+ def active_count(self):
101
+ return len(self._prev)
core/guidance.py ADDED
@@ -0,0 +1,141 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Voice and movement guidance with throttling and tracker-aware alerts."""
2
+ import time
3
+ from config import VOICE_COOLDOWN_SEC, VOICE_SLOPE_DELTA
4
+
5
+
6
+ class GuidanceEngine:
7
+ """Generates movement guidance and throttled voice messages."""
8
+
9
+ def __init__(self):
10
+ self._last_voice = ""
11
+ self._last_voice_time = 0.0
12
+ self._last_slope_announced = 0.0
13
+
14
+ def compute(self, slope_angle, slope_dir, terrain, obstacles,
15
+ slope_trend="STABLE", new_obstacles=None, closing_obstacles=None):
16
+ """Return guidance dict with movement recommendations and voice string."""
17
+ sa = abs(slope_angle)
18
+
19
+ knee_rec = 135 if sa > 20 else 145 if sa > 10 else 155 if sa > 5 else 165
20
+
21
+ if sa > 15 or terrain in ("ROCKY", "ROUGH"):
22
+ step, step_len = "SHORT", "30cm"
23
+ elif sa > 5:
24
+ step, step_len = "MEDIUM", "50cm"
25
+ else:
26
+ step, step_len = "NORMAL", "70cm"
27
+
28
+ if slope_dir == "UPHILL":
29
+ foot_adj = round(min(25, sa * 0.7), 1)
30
+ lean = "FORWARD"
31
+ elif slope_dir == "DOWNHILL":
32
+ foot_adj = round(-min(20, sa * 0.6), 1)
33
+ lean = "BACKWARD"
34
+ else:
35
+ foot_adj, lean = 0.0, "UPRIGHT"
36
+
37
+ near_obs = [o for o in obstacles if o['proximity'] > 0.6]
38
+ risk_score = min(100, int(sa * 1.5 + len(near_obs) * 20 +
39
+ (15 if terrain == "ROCKY" else 0)))
40
+
41
+ voice = self._build_voice(
42
+ slope_angle, slope_dir, terrain, obstacles, sa, step,
43
+ near_obs, slope_trend, new_obstacles, closing_obstacles)
44
+
45
+ return {
46
+ 'knee_rec': knee_rec,
47
+ 'step': step,
48
+ 'step_len': step_len,
49
+ 'foot_adj': foot_adj,
50
+ 'lean': lean,
51
+ 'risk_score': risk_score,
52
+ 'voice': voice,
53
+ 'obstacle_warning': self._obstacle_warning(near_obs),
54
+ 'slope_trend': slope_trend,
55
+ }
56
+
57
+ def _build_voice(self, slope_angle, slope_dir, terrain, obstacles, sa, step,
58
+ near_obs, slope_trend, new_obstacles, closing_obstacles):
59
+ parts = []
60
+
61
+ # Priority 1: NEW obstacles entering scene
62
+ if new_obstacles:
63
+ for ob in new_obstacles[:2]:
64
+ parts.append(f"New {ob['label']} on {ob['direction'].lower()}.")
65
+
66
+ # Priority 2: CLOSING obstacles (approaching fast)
67
+ if closing_obstacles:
68
+ for ob in closing_obstacles[:2]:
69
+ if ob['direction'] == "CENTER":
70
+ parts.append(f"{ob['label']} approaching ahead!")
71
+ else:
72
+ opp = "right" if ob['direction'] == "LEFT" else "left"
73
+ parts.append(f"{ob['label']} closing from {ob['direction'].lower()}. Move {opp}.")
74
+
75
+ # Priority 3: Already-near obstacles
76
+ if not closing_obstacles:
77
+ for ob in near_obs[:2]:
78
+ if ob['direction'] == "CENTER":
79
+ parts.append(f"{ob['label']} ahead! Stop.")
80
+ else:
81
+ opp = "right" if ob['direction'] == "LEFT" else "left"
82
+ parts.append(f"{ob['label']} on {ob['direction'].lower()}. Move {opp}.")
83
+
84
+ # Priority 4: Slope trend changes
85
+ if slope_trend == "STEEPENING" and sa > 5:
86
+ parts.append("Slope increasing. Slow down.")
87
+ elif slope_trend == "FLATTENING" and sa > 3:
88
+ parts.append("Slope easing.")
89
+
90
+ # Priority 5: Current slope guidance
91
+ if sa > 3:
92
+ parts.append(f"Slope {slope_angle:+.0f} degrees.")
93
+ if slope_dir == "UPHILL":
94
+ parts.append("Lean forward." if sa > 10 else "Slight forward lean.")
95
+ else:
96
+ parts.append("Lean back." if sa > 10 else "Slight backward lean.")
97
+
98
+ if sa > 15 or terrain in ("ROCKY", "ROUGH"):
99
+ parts.append("Short steps.")
100
+ elif sa > 5:
101
+ parts.append("Medium steps.")
102
+
103
+ if terrain == "ROCKY":
104
+ parts.append("Uneven ground.")
105
+ elif terrain == "ROUGH":
106
+ parts.append("Rough surface.")
107
+
108
+ if not parts:
109
+ parts.append("Path clear.")
110
+
111
+ return " ".join(parts)
112
+
113
+ def _obstacle_warning(self, near_obs):
114
+ if not near_obs:
115
+ return None
116
+ ob = near_obs[0]
117
+ if ob['direction'] == "CENTER":
118
+ return f"{ob['label']} AHEAD — STOP"
119
+ opp = "RIGHT" if ob['direction'] == "LEFT" else "LEFT"
120
+ return f"{ob['label']} on {ob['direction']} — move {opp}"
121
+
122
+ def should_speak(self, voice, slope_angle):
123
+ """Throttle: returns True if this message should be spoken aloud."""
124
+ now = time.time()
125
+ # Always speak obstacle warnings immediately
126
+ if any(kw in voice.lower() for kw in ["ahead", "stop", "new ", "closing", "approaching"]):
127
+ self._last_voice = voice
128
+ self._last_voice_time = now
129
+ return True
130
+ # Slope changed significantly
131
+ if abs(slope_angle - self._last_slope_announced) > VOICE_SLOPE_DELTA:
132
+ self._last_slope_announced = slope_angle
133
+ self._last_voice = voice
134
+ self._last_voice_time = now
135
+ return True
136
+ # Cooldown elapsed and message changed
137
+ if now - self._last_voice_time > VOICE_COOLDOWN_SEC and voice != self._last_voice:
138
+ self._last_voice = voice
139
+ self._last_voice_time = now
140
+ return True
141
+ return False
core/pose.py ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """BlazePose gait analysis."""
2
+ import numpy as np
3
+ import math
4
+ from config import LM, POSE_MODEL
5
+
6
+ _pose_lm = None
7
+
8
+
9
+ def _load():
10
+ global _pose_lm
11
+ if _pose_lm is None:
12
+ import mediapipe as mp
13
+ opts = mp.tasks.vision.PoseLandmarkerOptions(
14
+ base_options=mp.tasks.BaseOptions(model_asset_path=POSE_MODEL),
15
+ running_mode=mp.tasks.vision.RunningMode.IMAGE,
16
+ num_poses=1,
17
+ min_pose_detection_confidence=0.5,
18
+ )
19
+ _pose_lm = mp.tasks.vision.PoseLandmarker.create_from_options(opts)
20
+ return _pose_lm
21
+
22
+
23
+ def _angle(a, b, c):
24
+ ba = np.array(a) - np.array(b)
25
+ bc = np.array(c) - np.array(b)
26
+ cos = np.dot(ba, bc) / (np.linalg.norm(ba) * np.linalg.norm(bc) + 1e-8)
27
+ return math.degrees(math.acos(np.clip(cos, -1, 1)))
28
+
29
+
30
+ def _vert(top, bot):
31
+ return math.degrees(math.atan2(bot[0] - top[0], bot[1] - top[1]))
32
+
33
+
34
+ def _foot_tilt(heel, toe):
35
+ dx = toe[0] - heel[0]
36
+ dy = -(toe[1] - heel[1])
37
+ return math.degrees(math.atan2(dy, dx))
38
+
39
+
40
+ def analyze(frame_rgb, w, h):
41
+ """Run pose detection and extract gait metrics.
42
+
43
+ Returns (gait_dict, landmarks, foot_y) or (None, None, None).
44
+ """
45
+ import mediapipe as mp
46
+ lm_model = _load()
47
+ mp_img = mp.Image(image_format=mp.ImageFormat.SRGB, data=frame_rgb)
48
+ res = lm_model.detect(mp_img)
49
+
50
+ if not res.pose_landmarks:
51
+ return None, None, None
52
+
53
+ lms = res.pose_landmarks[0]
54
+ foot_y = max(lms[i].y * h for i in [27, 28, 29, 30, 31, 32])
55
+
56
+ gait = {}
57
+ for side, pfx in [('L', 'L_'), ('R', 'R_')]:
58
+ p = {n: (lms[LM[f'{pfx}{n}']].x * w, lms[LM[f'{pfx}{n}']].y * h)
59
+ for n in ['SHOULDER', 'HIP', 'KNEE', 'ANKLE', 'HEEL', 'FOOT']}
60
+ gait[f'{side}_knee'] = round(_angle(p['HIP'], p['KNEE'], p['ANKLE']), 1)
61
+ gait[f'{side}_ankle'] = round(_angle(p['KNEE'], p['ANKLE'], p['FOOT']), 1)
62
+ gait[f'{side}_hip'] = round(_angle(p['SHOULDER'], p['HIP'], p['KNEE']), 1)
63
+ gait[f'{side}_shin'] = round(_vert(p['KNEE'], p['ANKLE']), 1)
64
+ gait[f'{side}_lean'] = round(_vert(p['SHOULDER'], p['HIP']), 1)
65
+ gait[f'{side}_foot_tilt'] = round(_foot_tilt(p['HEEL'], p['FOOT']), 1)
66
+
67
+ for k in ['knee', 'ankle', 'hip', 'shin', 'lean', 'foot_tilt']:
68
+ gait[f'avg_{k}'] = round((gait[f'L_{k}'] + gait[f'R_{k}']) / 2, 1)
69
+ gait['symmetry'] = round(abs(gait['L_knee'] - gait['R_knee']), 1)
70
+
71
+ return gait, lms, foot_y
core/risk_engine.py ADDED
@@ -0,0 +1,227 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Risk fusion engine — contextual, trend-aware, with user profiles.
2
+
3
+ Improvements over POC:
4
+ 1. Contextual: bent knees on flat ground ≠ bent knees on a slope
5
+ 2. Trend-based: steepening slope is riskier than steady slope
6
+ 3. Obstacle velocity: closing obstacles score higher than static ones
7
+ 4. User profiles: elderly/athletic/default adjust sensitivity
8
+ 5. Temporal: risk decays slowly (no flicker between HIGH/LOW)
9
+ """
10
+ from config import RISK_HIGH, RISK_MEDIUM, RISK_LOW, RISK_PROFILE
11
+
12
+ # ─── User profiles: multipliers on base scores ───
13
+ _PROFILES = {
14
+ "default": {"slope": 1.0, "gait": 1.0, "obstacle": 1.0, "compound": 1.0},
15
+ "elderly": {"slope": 1.5, "gait": 1.5, "obstacle": 1.3, "compound": 1.5},
16
+ "athletic": {"slope": 0.6, "gait": 0.5, "obstacle": 1.0, "compound": 0.5},
17
+ }
18
+
19
+
20
+ def _get_profile():
21
+ return _PROFILES.get(RISK_PROFILE, _PROFILES["default"])
22
+
23
+
24
+ class RiskEngine:
25
+ """Stateful risk engine with temporal smoothing and context awareness."""
26
+
27
+ def __init__(self, profile=None):
28
+ self.profile = _PROFILES.get(profile or RISK_PROFILE, _PROFILES["default"])
29
+ self._prev_score = 0
30
+ self._prev_level = "SAFE"
31
+ self._decay = 0.7 # risk decays slowly to prevent flicker
32
+
33
+ def assess(self, slope_angle, slope_dir, gait, obstacles,
34
+ slope_trend="STABLE", new_obstacles=None, closing_obstacles=None):
35
+ """Compute contextual risk from all signals.
36
+
37
+ Returns dict with: risk, score, reasons, gait_summary, components.
38
+ """
39
+ sa = abs(slope_angle)
40
+ p = self.profile
41
+ score = 0
42
+ reasons = []
43
+ components = {}
44
+
45
+ # ── 1. Terrain slope ──
46
+ s_score = 0
47
+ if sa > 20:
48
+ s_score = 40
49
+ reasons.append(f"steep slope ({slope_angle:.0f}°)")
50
+ elif sa > 10:
51
+ s_score = 20
52
+ reasons.append(f"moderate slope ({slope_angle:.0f}°)")
53
+ elif sa > 3:
54
+ s_score = 5
55
+ reasons.append(f"mild slope ({slope_angle:.0f}°)")
56
+
57
+ # Trend bonus: steepening is riskier than steady
58
+ if slope_trend == "STEEPENING":
59
+ s_score = int(s_score * 1.4)
60
+ reasons.append("slope steepening")
61
+ elif slope_trend == "FLATTENING" and s_score > 0:
62
+ s_score = int(s_score * 0.7)
63
+
64
+ s_score = int(s_score * p["slope"])
65
+ components['slope'] = s_score
66
+ score += s_score
67
+
68
+ # ── 2. Gait analysis (contextual) ──
69
+ g_score = 0
70
+ gait_summary = {}
71
+ if gait:
72
+ kn = gait['avg_knee']
73
+ sh = abs(gait['avg_shin'])
74
+ ln = abs(gait['avg_lean'])
75
+ sy = gait['symmetry']
76
+ gait_summary = {'knee': kn, 'shin': sh, 'lean': ln, 'symmetry': sy}
77
+
78
+ # Bent knees: only risky if NOT on a slope (on slopes it's expected adaptation)
79
+ if kn < 130:
80
+ if sa < 5:
81
+ # Bent knees on flat = potential instability
82
+ g_score += 25
83
+ reasons.append(f"heavily bent knees on flat ({kn:.0f}°)")
84
+ else:
85
+ # Bent knees on slope = expected, mild concern only if extreme
86
+ if kn < 110:
87
+ g_score += 15
88
+ reasons.append(f"extreme knee bend ({kn:.0f}°)")
89
+ elif kn < 150:
90
+ if sa < 5:
91
+ g_score += 10
92
+ reasons.append(f"bent knees on flat ({kn:.0f}°)")
93
+
94
+ # Shin tilt: contextual — expected to tilt on slopes
95
+ expected_shin = sa * 0.4 # rough expected shin tilt for slope
96
+ excess_shin = max(0, sh - expected_shin)
97
+ if excess_shin > 15:
98
+ g_score += 15
99
+ reasons.append(f"excess shin tilt ({sh:.0f}° vs expected {expected_shin:.0f}°)")
100
+ elif excess_shin > 8:
101
+ g_score += 8
102
+
103
+ # Body lean: expected on slopes, risky if opposite direction
104
+ if slope_dir == "UPHILL" and ln < -10:
105
+ g_score += 20
106
+ reasons.append(f"leaning backward on uphill ({ln:.0f}°)")
107
+ elif slope_dir == "DOWNHILL" and ln > 10:
108
+ g_score += 20
109
+ reasons.append(f"leaning forward on downhill ({ln:.0f}°)")
110
+ elif abs(ln) > 20:
111
+ g_score += 10
112
+ reasons.append(f"excessive lean ({ln:.0f}°)")
113
+
114
+ # Asymmetry: always concerning
115
+ if sy > 25:
116
+ g_score += 25
117
+ reasons.append(f"severe gait asymmetry ({sy:.0f}°)")
118
+ elif sy > 15:
119
+ g_score += 12
120
+ reasons.append(f"gait asymmetry ({sy:.0f}°)")
121
+
122
+ g_score = int(g_score * p["gait"])
123
+ components['gait'] = g_score
124
+ score += g_score
125
+
126
+ # ── 3. Obstacles ──
127
+ o_score = 0
128
+ num_obs = len(obstacles) if isinstance(obstacles, list) else obstacles
129
+
130
+ if isinstance(obstacles, list):
131
+ near = [o for o in obstacles if o.get('proximity', 0) > 0.6]
132
+ o_score += min(20, len(near) * 10)
133
+ if len(near) >= 2:
134
+ reasons.append(f"{len(near)} obstacles nearby")
135
+
136
+ # Closing obstacles are more dangerous
137
+ if closing_obstacles:
138
+ o_score += min(20, len(closing_obstacles) * 12)
139
+ for ob in closing_obstacles[:2]:
140
+ rate = ob.get('closing_rate', 0)
141
+ reasons.append(f"{ob['label']} closing ({rate:.0%}/frame)")
142
+
143
+ # New obstacles: brief awareness bump
144
+ if new_obstacles:
145
+ o_score += min(10, len(new_obstacles) * 5)
146
+ else:
147
+ o_score += min(30, num_obs * 10)
148
+
149
+ o_score = int(o_score * p["obstacle"])
150
+ components['obstacles'] = o_score
151
+ score += o_score
152
+
153
+ # ── 4. Compound risks ──
154
+ c_score = 0
155
+ if gait and sa > 10:
156
+ kn = gait['avg_knee']
157
+ sy = gait['symmetry']
158
+ if kn < 150 and sy > 15:
159
+ c_score += 20
160
+ reasons.append("slope + bent knees + asymmetry")
161
+ elif kn < 150:
162
+ c_score += 12
163
+ reasons.append("slope + bent knees")
164
+ elif sy > 15:
165
+ c_score += 12
166
+ reasons.append("slope + asymmetry")
167
+
168
+ if isinstance(obstacles, list):
169
+ near = [o for o in obstacles if o.get('proximity', 0) > 0.7]
170
+ if near and sa > 10:
171
+ c_score += 15
172
+ reasons.append("slope + near obstacle")
173
+
174
+ c_score = int(c_score * p["compound"])
175
+ components['compound'] = c_score
176
+ score += c_score
177
+
178
+ # ── 5. Temporal smoothing (prevent flicker) ──
179
+ raw_score = min(100, score)
180
+ smoothed = self._decay * self._prev_score + (1 - self._decay) * raw_score
181
+
182
+ # Snap up fast (danger), decay down slowly (safety)
183
+ if raw_score > self._prev_score:
184
+ smoothed = max(smoothed, raw_score * 0.85) # jump up quickly
185
+
186
+ self._prev_score = smoothed
187
+ final_score = int(smoothed)
188
+
189
+ # Level
190
+ if final_score >= RISK_HIGH:
191
+ level = "HIGH"
192
+ elif final_score >= RISK_MEDIUM:
193
+ level = "MEDIUM"
194
+ elif final_score >= RISK_LOW:
195
+ level = "LOW"
196
+ else:
197
+ level = "SAFE"
198
+
199
+ self._prev_level = level
200
+
201
+ return {
202
+ 'risk': level,
203
+ 'score': final_score,
204
+ 'raw_score': raw_score,
205
+ 'terrain': slope_dir,
206
+ 'terrain_slope': round(slope_angle, 1),
207
+ 'slope_trend': slope_trend,
208
+ 'reasons': reasons,
209
+ 'gait_summary': gait_summary,
210
+ 'components': components,
211
+ 'profile': RISK_PROFILE,
212
+ }
213
+
214
+
215
+ # ─── Backward-compatible module-level function ───
216
+ _default_engine = None
217
+
218
+
219
+ def assess(slope_angle, slope_dir, gait, num_obstacles,
220
+ slope_trend="STABLE", new_obstacles=None, closing_obstacles=None):
221
+ """Stateless convenience wrapper. For video, use RiskEngine class directly."""
222
+ global _default_engine
223
+ if _default_engine is None:
224
+ _default_engine = RiskEngine()
225
+ return _default_engine.assess(
226
+ slope_angle, slope_dir, gait, num_obstacles,
227
+ slope_trend, new_obstacles, closing_obstacles)
core/tts.py ADDED
@@ -0,0 +1,143 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Text-to-speech engine with multiple backends and async playback.
2
+
3
+ Backends (tried in order):
4
+ 1. pyttsx3 — offline, cross-platform
5
+ 2. edge-tts — Microsoft Edge TTS (async, high quality, needs internet)
6
+ 3. espeak — CLI fallback (Linux)
7
+
8
+ Install one: pip install pyttsx3 OR pip install edge-tts
9
+ """
10
+ import threading
11
+ import queue
12
+ import time
13
+ import os
14
+ import tempfile
15
+
16
+
17
+ class TTSEngine:
18
+ """Non-blocking TTS that speaks in a background thread."""
19
+
20
+ def __init__(self, enabled=True):
21
+ self.enabled = enabled
22
+ self._backend = None
23
+ self._queue = queue.Queue(maxsize=5) # drop old messages if backed up
24
+ self._thread = None
25
+ self._stop = threading.Event()
26
+
27
+ if enabled:
28
+ self._backend = self._detect_backend()
29
+ if self._backend:
30
+ self._thread = threading.Thread(target=self._worker, daemon=True)
31
+ self._thread.start()
32
+ else:
33
+ print("[TTS] No backend available. Install: pip install pyttsx3")
34
+ self.enabled = False
35
+
36
+ def _detect_backend(self):
37
+ # Try edge-tts first — natural assistant voice
38
+ try:
39
+ import edge_tts
40
+ print("[TTS] Using edge-tts (GuyNeural)")
41
+ return ('edge_tts', None)
42
+ except ImportError:
43
+ pass
44
+
45
+ # Try pyttsx3
46
+ try:
47
+ import pyttsx3
48
+ engine = pyttsx3.init()
49
+ engine.setProperty('rate', 170)
50
+ engine.setProperty('volume', 1.0)
51
+ print("[TTS] Using pyttsx3")
52
+ return ('pyttsx3', engine)
53
+ except Exception:
54
+ pass
55
+
56
+ # Try espeak CLI
57
+ if os.system("which espeak > /dev/null 2>&1") == 0:
58
+ print("[TTS] Using espeak CLI")
59
+ return ('espeak', None)
60
+
61
+ return None
62
+
63
+ def speak(self, text):
64
+ """Queue text for speaking. Non-blocking. Drops if queue is full."""
65
+ if not self.enabled or not text:
66
+ return
67
+ try:
68
+ self._queue.put_nowait(text)
69
+ except queue.Full:
70
+ # Drop oldest, add new
71
+ try:
72
+ self._queue.get_nowait()
73
+ except queue.Empty:
74
+ pass
75
+ try:
76
+ self._queue.put_nowait(text)
77
+ except queue.Full:
78
+ pass
79
+
80
+ def _worker(self):
81
+ """Background thread that processes the speech queue."""
82
+ while not self._stop.is_set():
83
+ try:
84
+ text = self._queue.get(timeout=0.5)
85
+ except queue.Empty:
86
+ continue
87
+
88
+ # Drain queue — only speak the latest message
89
+ latest = text
90
+ while not self._queue.empty():
91
+ try:
92
+ latest = self._queue.get_nowait()
93
+ except queue.Empty:
94
+ break
95
+
96
+ self._speak_sync(latest)
97
+
98
+ def _speak_sync(self, text):
99
+ name, engine = self._backend
100
+ try:
101
+ if name == 'pyttsx3':
102
+ engine.say(text)
103
+ engine.runAndWait()
104
+ elif name == 'edge_tts':
105
+ self._speak_edge(text)
106
+ elif name == 'espeak':
107
+ # -s = speed (words per minute), -a = amplitude
108
+ safe = text.replace('"', '\\"').replace("'", "\\'")
109
+ os.system(f'espeak -s 170 -a 200 "{safe}" 2>/dev/null')
110
+ except Exception as e:
111
+ print(f"[TTS] Error: {e}")
112
+
113
+ def _speak_edge(self, text):
114
+ """edge-tts is async, run in a sync wrapper."""
115
+ import asyncio
116
+ import edge_tts
117
+
118
+ tmp = os.path.join(tempfile.gettempdir(), "nav_tts.mp3")
119
+
120
+ async def _gen():
121
+ comm = edge_tts.Communicate(text, "en-US-GuyNeural", rate="+15%")
122
+ await comm.save(tmp)
123
+
124
+ asyncio.run(_gen())
125
+
126
+ # Play with ffplay or aplay
127
+ if os.system(f"which ffplay > /dev/null 2>&1") == 0:
128
+ os.system(f"ffplay -nodisp -autoexit -loglevel error {tmp}")
129
+ elif os.system(f"which aplay > /dev/null 2>&1") == 0:
130
+ # Convert to wav first
131
+ wav = tmp.replace('.mp3', '.wav')
132
+ os.system(f"ffmpeg -y -i {tmp} {wav} -loglevel error 2>/dev/null")
133
+ os.system(f"aplay {wav} 2>/dev/null")
134
+
135
+ def shutdown(self):
136
+ self._stop.set()
137
+ if self._thread:
138
+ self._thread.join(timeout=2)
139
+ if self._backend and self._backend[0] == 'pyttsx3':
140
+ try:
141
+ self._backend[1].stop()
142
+ except Exception:
143
+ pass
core/tts_render.py ADDED
@@ -0,0 +1,175 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Render TTS voice guidance as audio segments and merge into video.
2
+
3
+ Generates .wav clips for each unique voice message, then composites
4
+ them onto the video timeline using ffmpeg.
5
+ """
6
+ import os
7
+ import tempfile
8
+ import json
9
+
10
+
11
+ def render_voice_track(voice_events, output_audio_path, total_duration):
12
+ """Generate a single audio track from timestamped voice events."""
13
+ tmpdir = tempfile.mkdtemp(prefix="nav_tts_")
14
+ backend = _detect_backend()
15
+ if not backend:
16
+ print("[TTS-Render] No TTS backend. Install: pip install edge-tts")
17
+ return False
18
+
19
+ # Deduplicate and synthesize unique texts
20
+ unique_texts = list(set(t for _, t in voice_events))
21
+ clip_map = {}
22
+ for i, text in enumerate(unique_texts):
23
+ clip_path = os.path.join(tmpdir, f"clip_{i}.wav")
24
+ _synth(backend, text, clip_path)
25
+ if os.path.exists(clip_path) and os.path.getsize(clip_path) > 100:
26
+ clip_map[text] = clip_path
27
+
28
+ if not clip_map:
29
+ return False
30
+
31
+ # Build a concat file: silence gaps + voice clips at correct timestamps
32
+ segments = []
33
+ cursor = 0.0
34
+
35
+ for ts, text in sorted(voice_events, key=lambda x: x[0]):
36
+ if text not in clip_map:
37
+ continue
38
+ # Add silence gap before this clip
39
+ gap = ts - cursor
40
+ if gap > 0.05:
41
+ silence_path = os.path.join(tmpdir, f"silence_{len(segments)}.wav")
42
+ os.system(f'ffmpeg -y -f lavfi -i anullsrc=r=22050:cl=mono -t {gap:.3f} {silence_path} -loglevel error')
43
+ if os.path.exists(silence_path):
44
+ segments.append(silence_path)
45
+ cursor = ts
46
+
47
+ # Get clip duration
48
+ dur_str = os.popen(
49
+ f'ffprobe -i {clip_map[text]} -show_entries format=duration -v error -of csv=p=0'
50
+ ).read().strip()
51
+ clip_dur = float(dur_str) if dur_str else 2.0
52
+
53
+ segments.append(clip_map[text])
54
+ cursor = ts + clip_dur
55
+
56
+ # Add trailing silence to match video duration
57
+ if cursor < total_duration:
58
+ trail = os.path.join(tmpdir, "silence_trail.wav")
59
+ os.system(f'ffmpeg -y -f lavfi -i anullsrc=r=22050:cl=mono -t {total_duration - cursor:.3f} {trail} -loglevel error')
60
+ if os.path.exists(trail):
61
+ segments.append(trail)
62
+
63
+ if not segments:
64
+ return False
65
+
66
+ # Write concat list
67
+ concat_file = os.path.join(tmpdir, "concat.txt")
68
+ with open(concat_file, 'w') as f:
69
+ for seg in segments:
70
+ f.write(f"file '{seg}'\n")
71
+
72
+ # Concatenate all segments
73
+ os.system(f'ffmpeg -y -f concat -safe 0 -i {concat_file} -c:a pcm_s16le -ar 22050 -ac 1 {output_audio_path} -loglevel error')
74
+
75
+ # Cleanup
76
+ for f_path in os.listdir(tmpdir):
77
+ try:
78
+ os.remove(os.path.join(tmpdir, f_path))
79
+ except Exception:
80
+ pass
81
+ try:
82
+ os.rmdir(tmpdir)
83
+ except Exception:
84
+ pass
85
+
86
+ return os.path.exists(output_audio_path) and os.path.getsize(output_audio_path) > 100
87
+
88
+ # Cleanup
89
+ for f in os.listdir(tmpdir):
90
+ os.remove(os.path.join(tmpdir, f))
91
+ os.rmdir(tmpdir)
92
+
93
+ return os.path.exists(output_audio_path)
94
+
95
+
96
+ def merge_voice_into_video(video_path, voice_events, total_duration):
97
+ """Add TTS voice track to an existing video file. Returns new path."""
98
+ tmpdir = tempfile.gettempdir()
99
+ voice_track = os.path.join(tmpdir, "nav_voice_track.wav")
100
+
101
+ if not render_voice_track(voice_events, voice_track, total_duration):
102
+ return video_path # fallback: return original
103
+
104
+ output = video_path.replace('.mp4', '_voiced.mp4')
105
+
106
+ # Check if video already has audio
107
+ has_audio = os.popen(
108
+ f'ffprobe -i {video_path} -show_streams -select_streams a -loglevel error 2>&1'
109
+ ).read().strip()
110
+
111
+ if has_audio:
112
+ # Mix TTS with existing audio, use longest duration
113
+ os.system(
114
+ f'ffmpeg -y -i {video_path} -i {voice_track} '
115
+ f'-filter_complex "[0:a][1:a]amix=inputs=2:duration=longest:dropout_transition=0[a]" '
116
+ f'-map 0:v -map "[a]" -c:v copy -c:a aac -shortest '
117
+ f'{output} -loglevel error'
118
+ )
119
+ else:
120
+ # Add TTS as the only audio
121
+ os.system(
122
+ f'ffmpeg -y -i {video_path} -i {voice_track} '
123
+ f'-map 0:v -map 1:a -c:v copy -c:a aac -shortest '
124
+ f'{output} -loglevel error'
125
+ )
126
+
127
+ if os.path.exists(output) and os.path.getsize(output) > 0:
128
+ return output
129
+ return video_path
130
+
131
+
132
+ def _detect_backend():
133
+ # Prefer edge-tts — natural assistant voice
134
+ try:
135
+ import edge_tts
136
+ return "edge_tts"
137
+ except ImportError:
138
+ pass
139
+ if os.system("which espeak > /dev/null 2>&1") == 0:
140
+ return "espeak"
141
+ try:
142
+ import pyttsx3
143
+ return "pyttsx3"
144
+ except ImportError:
145
+ pass
146
+ return None
147
+
148
+
149
+ def _synth(backend, text, out_path):
150
+ """Synthesize text to a .wav file."""
151
+ try:
152
+ if backend == "edge_tts":
153
+ import asyncio, edge_tts
154
+ mp3 = out_path.replace('.wav', '.mp3')
155
+ async def _gen():
156
+ # en-US-GuyNeural: clear male assistant voice
157
+ # rate=+15% for snappy navigation feel
158
+ c = edge_tts.Communicate(text, "en-US-GuyNeural", rate="+15%")
159
+ await c.save(mp3)
160
+ asyncio.run(_gen())
161
+ os.system(f'ffmpeg -y -i {mp3} -ar 22050 -ac 1 {out_path} -loglevel error')
162
+ if os.path.exists(mp3):
163
+ os.remove(mp3)
164
+ elif backend == "espeak":
165
+ safe = text.replace('"', '\\"').replace("'", "\\'")
166
+ os.system(f'espeak -s 170 -w {out_path} "{safe}" 2>/dev/null')
167
+ elif backend == "pyttsx3":
168
+ import pyttsx3
169
+ engine = pyttsx3.init()
170
+ engine.setProperty('rate', 170)
171
+ engine.save_to_filename(out_path)
172
+ engine.say(text)
173
+ engine.runAndWait()
174
+ except Exception as e:
175
+ print(f"[TTS-Render] Synth error: {e}")
deploy_ec2.sh ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+ # Deploy Blind Navigation App to EC2
3
+ # Run this ON the EC2 instance after uploading blind_nav_app.tar.gz
4
+
5
+ set -e
6
+
7
+ echo "=== Installing system dependencies ==="
8
+ sudo apt-get update
9
+ sudo apt-get install -y python3-pip python3-venv ffmpeg espeak libgl1-mesa-glx
10
+
11
+ echo "=== Setting up app ==="
12
+ mkdir -p ~/blind_nav && cd ~/blind_nav
13
+ tar -xzf ~/blind_nav_app.tar.gz
14
+
15
+ echo "=== Creating virtual environment ==="
16
+ python3 -m venv venv
17
+ source venv/bin/activate
18
+
19
+ echo "=== Installing Python packages ==="
20
+ pip install --upgrade pip
21
+ pip install -r requirements.txt
22
+
23
+ echo "=== Done! ==="
24
+ echo ""
25
+ echo "To run:"
26
+ echo " cd ~/blind_nav"
27
+ echo " source venv/bin/activate"
28
+ echo " python app_live.py"
29
+ echo ""
30
+ echo "App will be at: http://<your-ec2-public-ip>:7860"
31
+ echo "Make sure port 7860 is open in your EC2 security group!"
pipeline.py ADDED
@@ -0,0 +1,298 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Unified video/image/webcam pipeline using core modules."""
2
+ import cv2
3
+ import numpy as np
4
+ import os
5
+ import time
6
+ import tempfile
7
+
8
+ from config import DEFAULT_SKIP_FRAMES, DEFAULT_DEPTH_EVERY
9
+ from core import detector, depth, pose, risk_engine
10
+ from core.detector import ObstacleTracker
11
+ from core.depth import SlopeSmoother
12
+ from core.guidance import GuidanceEngine
13
+ from core.tts import TTSEngine
14
+ from renderers import overlay as overlay_renderer
15
+ from renderers import blind_nav as blind_nav_renderer
16
+
17
+
18
+ def process_image(image_bgr, mode="overlay"):
19
+ """Process a single image. Returns (rendered_bgr, risk_dict, guidance_dict)."""
20
+ h, w = image_bgr.shape[:2]
21
+ rgb = cv2.cvtColor(image_bgr, cv2.COLOR_BGR2RGB)
22
+
23
+ obstacles = detector.detect(image_bgr)
24
+ gait, landmarks, foot_y = pose.analyze(rgb, w, h)
25
+ depth_norm = depth.estimate_depth(rgb, h, w)
26
+ slope_angle, slope_dir, terrain, gs = depth.estimate_slope(depth_norm, h, w, foot_y)
27
+ risk = risk_engine.assess(slope_angle, slope_dir, gait, len(obstacles))
28
+
29
+ guide_engine = GuidanceEngine()
30
+ guidance = guide_engine.compute(slope_angle, slope_dir, terrain, obstacles)
31
+
32
+ if mode == "blind_nav":
33
+ rendered = blind_nav_renderer.render(
34
+ image_bgr, obstacles, slope_angle, slope_dir, terrain, depth_norm, guidance)
35
+ else:
36
+ depth_mini = overlay_renderer.render_depth_mini(depth_norm, w, h)
37
+ rendered = overlay_renderer.render(
38
+ image_bgr, obstacles, gait, landmarks, risk, guidance, depth_mini)
39
+
40
+ return rendered, risk, guidance
41
+
42
+
43
+ def process_video(video_path, mode="overlay", skip_frames=DEFAULT_SKIP_FRAMES,
44
+ depth_every=DEFAULT_DEPTH_EVERY, track=True, tts=False):
45
+ """Process video file. Returns output video path."""
46
+ cap = cv2.VideoCapture(video_path)
47
+ if not cap.isOpened():
48
+ return None
49
+
50
+ w, h = int(cap.get(3)), int(cap.get(4))
51
+ fps = cap.get(5)
52
+ total = int(cap.get(7))
53
+ out_fps = fps / (skip_frames + 1)
54
+ original_path = video_path # keep for audio mux later
55
+
56
+ if mode == "blind_nav":
57
+ out_size = (w * 2, h + 70)
58
+ else:
59
+ out_size = (w, h)
60
+
61
+ out_path = os.path.join(tempfile.gettempdir(), f"nav_{mode}.mp4")
62
+ writer = cv2.VideoWriter(out_path, cv2.VideoWriter_fourcc(*'mp4v'), out_fps, out_size)
63
+
64
+ # Stateful components
65
+ guide_engine = GuidanceEngine()
66
+ slope_smoother = SlopeSmoother()
67
+ obs_tracker = ObstacleTracker()
68
+ tts_engine = TTSEngine(enabled=tts)
69
+ voice_events = [] # (timestamp, text) for offline TTS rendering
70
+
71
+ cached_depth = np.zeros((h, w), dtype=np.float32)
72
+ cached_depth_mini = None
73
+
74
+ frame_idx = processed = 0
75
+ t0 = time.time()
76
+ print(f"Processing {total} frames ({w}x{h}) mode={mode}...", flush=True)
77
+
78
+ while True:
79
+ ret, frame = cap.read()
80
+ if not ret:
81
+ break
82
+ frame_idx += 1
83
+ if frame_idx % (skip_frames + 1) != 0:
84
+ continue
85
+
86
+ rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
87
+
88
+ # Obstacles with tracking
89
+ obstacles = detector.detect(frame, track=track)
90
+ new_obs, closing_obs, lost_ids = obs_tracker.update(obstacles)
91
+
92
+ # Pose
93
+ gait, landmarks, foot_y = pose.analyze(rgb, w, h)
94
+
95
+ # Depth (every N frames)
96
+ if frame_idx % (depth_every * (skip_frames + 1)) == 0 or frame_idx <= skip_frames + 1:
97
+ cached_depth = depth.estimate_depth(rgb, h, w)
98
+ raw_slope, raw_dir, raw_terrain, gs = depth.estimate_slope(
99
+ cached_depth, h, w, foot_y)
100
+ slope_smoother.update(raw_slope, raw_dir, raw_terrain)
101
+ cached_depth_mini = overlay_renderer.render_depth_mini(cached_depth, w, h)
102
+
103
+ # Use smoothed values
104
+ s_angle = slope_smoother.angle
105
+ s_dir = slope_smoother.direction
106
+ s_terrain = slope_smoother.terrain
107
+ s_trend = slope_smoother.trend
108
+
109
+ # Risk + guidance
110
+ risk = risk_engine.assess(
111
+ s_angle, s_dir, gait, obstacles,
112
+ slope_trend=s_trend,
113
+ new_obstacles=new_obs,
114
+ closing_obstacles=closing_obs)
115
+ guidance = guide_engine.compute(
116
+ s_angle, s_dir, s_terrain, obstacles,
117
+ slope_trend=s_trend,
118
+ new_obstacles=new_obs,
119
+ closing_obstacles=closing_obs)
120
+
121
+ # Render
122
+ if mode == "blind_nav":
123
+ rendered = blind_nav_renderer.render(
124
+ frame, obstacles, s_angle, s_dir, s_terrain,
125
+ cached_depth, guidance)
126
+ else:
127
+ rendered = overlay_renderer.render(
128
+ frame, obstacles, gait, landmarks, risk, guidance, cached_depth_mini)
129
+
130
+ writer.write(rendered)
131
+ processed += 1
132
+
133
+ # TTS: speak if throttle allows
134
+ if guide_engine.should_speak(guidance['voice'], s_angle):
135
+ timestamp = frame_idx / fps
136
+ voice_events.append((timestamp, guidance['voice']))
137
+ if tts_engine.enabled:
138
+ tts_engine.speak(guidance['voice'])
139
+
140
+ if processed % 20 == 0:
141
+ el = time.time() - t0
142
+ new_str = f" new={len(new_obs)}" if new_obs else ""
143
+ close_str = f" closing={len(closing_obs)}" if closing_obs else ""
144
+ print(f" {frame_idx}/{total} | {processed / el:.1f}fps | "
145
+ f"{s_dir} {s_angle:.0f}° [{s_trend}] | {s_terrain} | "
146
+ f"obs={len(obstacles)}{new_str}{close_str} | risk={risk['risk']}",
147
+ flush=True)
148
+
149
+ cap.release()
150
+ writer.release()
151
+ tts_engine.shutdown()
152
+
153
+ # Re-encode to H.264 and mux audio from original
154
+ h264 = out_path.replace('.mp4', '_h264.mp4')
155
+ # First: encode video to H.264
156
+ os.system(f'ffmpeg -y -i {out_path} -c:v libx264 -preset fast -crf 23 -pix_fmt yuv420p {h264} -loglevel error')
157
+ if os.path.exists(h264) and os.path.getsize(h264) > 0:
158
+ os.remove(out_path)
159
+ out_path = h264
160
+
161
+ # Mux original audio (tempo-adjusted for frame skipping)
162
+ if original_path and os.path.exists(original_path):
163
+ with_audio = out_path.replace('.mp4', '_audio.mp4')
164
+ tempo = skip_frames + 1 # audio needs to speed up to match skipped video
165
+ os.system(
166
+ f'ffmpeg -y -i {out_path} -i {original_path} '
167
+ f'-filter_complex "[1:a]atempo={tempo}[a]" '
168
+ f'-map 0:v -map "[a]" -c:v copy -c:a aac -shortest '
169
+ f'{with_audio} -loglevel error'
170
+ )
171
+ if os.path.exists(with_audio) and os.path.getsize(with_audio) > 0:
172
+ os.remove(out_path)
173
+ out_path = with_audio
174
+
175
+ print(f"Done! {processed} frames in {time.time() - t0:.1f}s", flush=True)
176
+
177
+ # Bake TTS voice into video if we have events
178
+ if voice_events:
179
+ from core.tts_render import merge_voice_into_video
180
+ duration = total / fps
181
+ print(f"Rendering {len(voice_events)} voice events into video...", flush=True)
182
+ out_path = merge_voice_into_video(out_path, voice_events, duration)
183
+
184
+ return out_path
185
+
186
+
187
+ def run_webcam(mode="overlay", camera_id=0, tts=True):
188
+ """Live webcam/RTSP processing loop. Press 'q' to quit.
189
+
190
+ Args:
191
+ camera_id: 0 for USB webcam, or "rtsp://..." for IP camera
192
+ """
193
+ from core.camera import CameraStream
194
+
195
+ try:
196
+ cam = CameraStream(camera_id)
197
+ except RuntimeError as e:
198
+ print(f"Error: {e}")
199
+ return
200
+
201
+ guide_engine = GuidanceEngine()
202
+ slope_smoother = SlopeSmoother()
203
+ obs_tracker = ObstacleTracker()
204
+ tts_engine = TTSEngine(enabled=tts)
205
+ cached_depth = None
206
+ cached_depth_mini = None
207
+ depth_counter = 0
208
+ frame_count = 0
209
+ t0 = time.time()
210
+
211
+ print(f"Live mode={mode} tts={tts}. Press 'q' to quit.", flush=True)
212
+ while cam.is_open:
213
+ frame = cam.read()
214
+ if frame is None:
215
+ time.sleep(0.01)
216
+ continue
217
+
218
+ h, w = frame.shape[:2]
219
+ rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
220
+
221
+ obstacles = detector.detect(frame, track=True)
222
+ new_obs, closing_obs, _ = obs_tracker.update(obstacles)
223
+ gait, landmarks, foot_y = pose.analyze(rgb, w, h)
224
+
225
+ depth_counter += 1
226
+ if depth_counter % DEFAULT_DEPTH_EVERY == 0 or cached_depth is None:
227
+ cached_depth = depth.estimate_depth(rgb, h, w)
228
+ raw_slope, raw_dir, raw_terrain, _ = depth.estimate_slope(cached_depth, h, w, foot_y)
229
+ slope_smoother.update(raw_slope, raw_dir, raw_terrain)
230
+ cached_depth_mini = overlay_renderer.render_depth_mini(cached_depth, w, h)
231
+
232
+ risk = risk_engine.assess(
233
+ slope_smoother.angle, slope_smoother.direction, gait, obstacles,
234
+ slope_trend=slope_smoother.trend,
235
+ new_obstacles=new_obs,
236
+ closing_obstacles=closing_obs)
237
+ guidance = guide_engine.compute(
238
+ slope_smoother.angle, slope_smoother.direction, slope_smoother.terrain,
239
+ obstacles, slope_trend=slope_smoother.trend,
240
+ new_obstacles=new_obs, closing_obstacles=closing_obs)
241
+
242
+ if mode == "blind_nav":
243
+ rendered = blind_nav_renderer.render(
244
+ frame, obstacles, slope_smoother.angle, slope_smoother.direction,
245
+ slope_smoother.terrain, cached_depth if cached_depth is not None
246
+ else np.zeros((h, w), dtype=np.float32), guidance)
247
+ else:
248
+ rendered = overlay_renderer.render(
249
+ frame, obstacles, gait, landmarks, risk, guidance, cached_depth_mini)
250
+
251
+ if tts_engine.enabled and guide_engine.should_speak(guidance['voice'], slope_smoother.angle):
252
+ tts_engine.speak(guidance['voice'])
253
+
254
+ frame_count += 1
255
+ if frame_count % 30 == 0:
256
+ fps = frame_count / (time.time() - t0)
257
+ cv2.setWindowTitle("Navigation", f"Navigation | {fps:.1f} FPS | {risk['risk']}")
258
+
259
+ cv2.imshow("Navigation", rendered)
260
+ key = cv2.waitKey(1) & 0xFF
261
+ if key == ord('q'):
262
+ break
263
+
264
+ cam.release()
265
+ cv2.destroyAllWindows()
266
+ tts_engine.shutdown()
267
+ print(f"Session: {frame_count} frames in {time.time() - t0:.1f}s", flush=True)
268
+
269
+
270
+ if __name__ == "__main__":
271
+ import sys
272
+
273
+ if len(sys.argv) < 2:
274
+ print("Usage:")
275
+ print(" python pipeline.py <video.mp4> [--mode overlay|blind_nav] [--tts]")
276
+ print(" python pipeline.py --webcam [--cam 0] [--mode overlay|blind_nav] [--tts]")
277
+ print(" python pipeline.py --webcam --cam rtsp://user:pass@ip:554/stream [--tts]")
278
+ sys.exit(1)
279
+
280
+ mode = "overlay"
281
+ if "--mode" in sys.argv:
282
+ idx = sys.argv.index("--mode")
283
+ mode = sys.argv[idx + 1] if idx + 1 < len(sys.argv) else "overlay"
284
+
285
+ use_tts = "--tts" in sys.argv
286
+
287
+ cam_source = 0
288
+ if "--cam" in sys.argv:
289
+ idx = sys.argv.index("--cam")
290
+ val = sys.argv[idx + 1] if idx + 1 < len(sys.argv) else "0"
291
+ cam_source = val if val.startswith("rtsp") else int(val)
292
+
293
+ if sys.argv[1] == "--webcam":
294
+ run_webcam(mode=mode, camera_id=cam_source, tts=use_tts)
295
+ else:
296
+ out = process_video(sys.argv[1], mode=mode, tts=use_tts)
297
+ if out:
298
+ print(f"Output: {out}")
pose_landmarker_heavy.task ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:64437af838a65d18e5ba7a0d39b465540069bc8aae8308de3e318aad31fcbc7b
3
+ size 30664242
renderers/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+
renderers/blind_nav.py ADDED
@@ -0,0 +1,96 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """2-panel blind navigation renderer (live feed + terrain map)."""
2
+ import cv2
3
+ import numpy as np
4
+ import math
5
+ from config import LM
6
+
7
+
8
+ def render(frame, obstacles, slope_angle, slope_dir, terrain, depth_norm, guidance):
9
+ """Render 2-panel layout: left=live+obstacles, right=depth+slope, bottom=voice bar."""
10
+ h, w = frame.shape[:2]
11
+ bar_h = 70
12
+ canvas = np.zeros((h + bar_h, w * 2, 3), dtype=np.uint8)
13
+
14
+ # Left panel: live feed + obstacle highlights
15
+ left = frame.copy()
16
+ for ob in obstacles:
17
+ x1, y1, x2, y2 = ob['box']
18
+ u = ob['proximity']
19
+ color = (0, 0, 180) if u > 0.7 else (0, 160, 200) if u > 0.4 else (0, 160, 0)
20
+ overlay = left.copy()
21
+ cv2.rectangle(overlay, (x1, y1), (x2, y2), color, -1)
22
+ left = cv2.addWeighted(overlay, 0.35 if u > 0.7 else 0.2, left, 0.65 if u > 0.7 else 0.8, 0)
23
+ cv2.rectangle(left, (x1, y1), (x2, y2), color, 3)
24
+ # Label with background
25
+ label = f"{ob['label']} {ob['dist']}"
26
+ (tw, th), _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.55, 2)
27
+ cv2.rectangle(left, (x1, y1 - th - 8), (x1 + tw + 4, y1), color, -1)
28
+ cv2.putText(left, label, (x1 + 2, y1 - 5),
29
+ cv2.FONT_HERSHEY_SIMPLEX, 0.55, (255, 255, 255), 2)
30
+ # Direction arrow
31
+ cx, cy = ob['center']
32
+ if ob['direction'] != "CENTER":
33
+ opp_x = cx - 50 if ob['direction'] == "LEFT" else cx + 50
34
+ cv2.arrowedLine(left, (cx, cy), (opp_x, cy), (255, 255, 255), 3, tipLength=0.4)
35
+ else:
36
+ cv2.arrowedLine(left, (cx, cy - 40), (cx, cy + 40), (0, 0, 255), 3, tipLength=0.4)
37
+
38
+ oc = (0, 255, 0) if len(obstacles) == 0 else (0, 200, 255) if len(obstacles) <= 2 else (0, 0, 255)
39
+ cv2.rectangle(left, (0, 0), (w, 28), (0, 0, 0), -1)
40
+ cv2.putText(left, f"LIVE FEED | Obstacles: {len(obstacles)}", (8, 20),
41
+ cv2.FONT_HERSHEY_SIMPLEX, 0.5, oc, 1)
42
+ canvas[:h, :w] = left
43
+
44
+ # Right panel: depth + slope
45
+ depth_color = cv2.applyColorMap((depth_norm * 255).astype(np.uint8), cv2.COLORMAP_INFERNO)
46
+ right = depth_color.copy()
47
+ sa = abs(slope_angle)
48
+ slope_tint = (0, 0, 200) if sa > 15 else (0, 180, 220) if sa > 5 else (0, 180, 0)
49
+
50
+ gs = int(h * 0.55)
51
+ overlay = right.copy()
52
+ cv2.rectangle(overlay, (0, gs), (w, h), slope_tint, -1)
53
+ right = cv2.addWeighted(overlay, 0.25, right, 0.75, 0)
54
+ cv2.line(right, (0, gs), (w, gs), (0, 255, 0), 1)
55
+
56
+ # Slope arrow
57
+ acx, acy = w // 2, (gs + h) // 2
58
+ arad = math.radians(slope_angle)
59
+ ax = int(acx + math.cos(arad) * 50)
60
+ ay = int(acy - math.sin(arad) * 50)
61
+ cv2.arrowedLine(right, (acx, acy), (ax, ay), (0, 255, 0), 3, tipLength=0.3)
62
+
63
+ cv2.rectangle(right, (0, 0), (w, 80), (0, 0, 0), -1)
64
+ cv2.putText(right, f"TERRAIN: {slope_dir} {slope_angle:.1f} deg", (8, 20),
65
+ cv2.FONT_HERSHEY_SIMPLEX, 0.5, slope_tint, 2)
66
+ cv2.putText(right, f"Surface: {terrain}", (8, 42),
67
+ cv2.FONT_HERSHEY_SIMPLEX, 0.45, (200, 200, 200), 1)
68
+
69
+ if sa > 3:
70
+ cv2.putText(right, f"Foot adjust: {guidance['foot_adj']:+.0f} deg | Knee: {guidance['knee_rec']} deg",
71
+ (8, 65), cv2.FONT_HERSHEY_SIMPLEX, 0.4, (0, 255, 255), 1)
72
+ else:
73
+ cv2.putText(right, "Foot: level | Knee: normal",
74
+ (8, 65), cv2.FONT_HERSHEY_SIMPLEX, 0.4, (0, 200, 0), 1)
75
+ canvas[:h, w:] = right
76
+
77
+ # Bottom voice bar
78
+ cv2.rectangle(canvas, (0, h), (w * 2, h + bar_h), (25, 25, 35), -1)
79
+ voice = guidance['voice']
80
+ cv2.putText(canvas, "VOICE:", (10, h + 22), cv2.FONT_HERSHEY_SIMPLEX, 0.45, (100, 180, 255), 1)
81
+ cv2.putText(canvas, voice[:100], (75, h + 22), cv2.FONT_HERSHEY_SIMPLEX, 0.42, (255, 255, 255), 1)
82
+
83
+ step = guidance['step']
84
+ sc = (0, 255, 0) if step == "NORMAL" else (0, 200, 255) if step == "MEDIUM" else (0, 100, 255)
85
+ cv2.putText(canvas, f"Step: {step}", (w * 2 - 300, h + 22),
86
+ cv2.FONT_HERSHEY_SIMPLEX, 0.5, sc, 2)
87
+
88
+ risk = guidance['risk_score']
89
+ rc = (0, 200, 0) if risk < 30 else (0, 200, 200) if risk < 60 else (0, 0, 255)
90
+ bar_x = w * 2 - 130
91
+ cv2.putText(canvas, "Risk:", (bar_x, h + 22), cv2.FONT_HERSHEY_SIMPLEX, 0.4, (180, 180, 180), 1)
92
+ cv2.rectangle(canvas, (bar_x + 40, h + 10), (bar_x + 120, h + 25), (60, 60, 60), -1)
93
+ fill = int(80 * min(1.0, risk / 100.0))
94
+ cv2.rectangle(canvas, (bar_x + 40, h + 10), (bar_x + 40 + fill, h + 25), rc, -1)
95
+
96
+ return canvas
renderers/overlay.py ADDED
@@ -0,0 +1,110 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Simple HUD overlay renderer — single frame with annotations."""
2
+ import cv2
3
+ import math
4
+ import numpy as np
5
+ from config import LM, SKELETON_CONNS
6
+
7
+
8
+ def draw_obstacles(frame, obstacles):
9
+ h, w = frame.shape[:2]
10
+ for ob in obstacles:
11
+ x1, y1, x2, y2 = ob['box']
12
+ u = ob['proximity']
13
+ color = (0, int(255 * (1 - u)), int(255 * u))
14
+
15
+ # Semi-transparent fill
16
+ overlay = frame.copy()
17
+ cv2.rectangle(overlay, (x1, y1), (x2, y2), color, -1)
18
+ alpha = 0.35 if u > 0.7 else 0.2
19
+ cv2.addWeighted(overlay, alpha, frame, 1 - alpha, 0, frame)
20
+
21
+ # Thick border
22
+ cv2.rectangle(frame, (x1, y1), (x2, y2), color, 3)
23
+
24
+ # Label with background
25
+ label = f"{ob['label']} {ob['dist']}"
26
+ if 'track_id' in ob:
27
+ label = f"#{ob['track_id']} {label}"
28
+ (tw, th), _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.55, 2)
29
+ cv2.rectangle(frame, (x1, y1 - th - 8), (x1 + tw + 4, y1), color, -1)
30
+ cv2.putText(frame, label, (x1 + 2, y1 - 5),
31
+ cv2.FONT_HERSHEY_SIMPLEX, 0.55, (255, 255, 255), 2)
32
+
33
+ # Direction arrow
34
+ cx, cy = ob['center']
35
+ if ob['direction'] == "LEFT":
36
+ cv2.arrowedLine(frame, (cx + 40, cy), (cx - 40, cy), (255, 255, 255), 3, tipLength=0.4)
37
+ elif ob['direction'] == "RIGHT":
38
+ cv2.arrowedLine(frame, (cx - 40, cy), (cx + 40, cy), (255, 255, 255), 3, tipLength=0.4)
39
+ else:
40
+ cv2.arrowedLine(frame, (cx, cy - 40), (cx, cy + 40), (0, 0, 255), 3, tipLength=0.4)
41
+
42
+
43
+ def draw_skeleton(frame, landmarks, gait, w, h):
44
+ if not landmarks:
45
+ return
46
+ for i, j in SKELETON_CONNS:
47
+ p1 = (int(landmarks[i].x * w), int(landmarks[i].y * h))
48
+ p2 = (int(landmarks[j].x * w), int(landmarks[j].y * h))
49
+ cv2.line(frame, p1, p2, (0, 255, 0), 2)
50
+ for i in LM.values():
51
+ cv2.circle(frame, (int(landmarks[i].x * w), int(landmarks[i].y * h)), 4, (0, 0, 255), -1)
52
+ if gait:
53
+ for side, pfx in [('L', 'L_'), ('R', 'R_')]:
54
+ ki = LM[f'{pfx}KNEE']
55
+ kx, ky = int(landmarks[ki].x * w), int(landmarks[ki].y * h)
56
+ c = (0, 255, 255) if side == 'L' else (255, 255, 0)
57
+ cv2.putText(frame, f"{gait[f'{side}_knee']:.0f}°", (kx + 5, ky - 5),
58
+ cv2.FONT_HERSHEY_SIMPLEX, 0.35, c, 1)
59
+
60
+
61
+ def draw_hud(frame, risk_result, guidance):
62
+ h, w = frame.shape[:2]
63
+ cv2.rectangle(frame, (0, 0), (w, 70), (0, 0, 0), -1)
64
+
65
+ risk = risk_result['risk']
66
+ rc = {'SAFE': (0, 180, 0), 'LOW': (0, 220, 0), 'MEDIUM': (0, 180, 220), 'HIGH': (0, 0, 220)}[risk]
67
+ cv2.rectangle(frame, (5, 5), (170, 35), rc, -1)
68
+ cv2.putText(frame, f"RISK: {risk} ({risk_result['score']})", (10, 28),
69
+ cv2.FONT_HERSHEY_SIMPLEX, 0.55, (255, 255, 255), 2)
70
+
71
+ slope_text = f"Slope: {risk_result['terrain']} {risk_result['terrain_slope']:.1f}°"
72
+ if guidance and guidance.get('slope_trend', 'STABLE') != 'STABLE':
73
+ slope_text += f" [{guidance['slope_trend']}]"
74
+ cv2.putText(frame, slope_text, (180, 28), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (200, 200, 200), 1)
75
+
76
+ gs = risk_result.get('gait_summary', {})
77
+ if gs:
78
+ cv2.putText(frame, f"Knee:{gs.get('knee', 0):.0f} Shin:{gs.get('shin', 0):.0f} "
79
+ f"Lean:{gs.get('lean', 0):.0f} Sym:{gs.get('symmetry', 0):.0f}",
80
+ (5, 50), cv2.FONT_HERSHEY_SIMPLEX, 0.38, (180, 180, 180), 1)
81
+
82
+ # Risk component breakdown
83
+ comps = risk_result.get('components', {})
84
+ if comps:
85
+ parts = [f"{k[0].upper()}:{v}" for k, v in comps.items() if v > 0]
86
+ if parts:
87
+ cv2.putText(frame, " ".join(parts), (w - 200, 50),
88
+ cv2.FONT_HERSHEY_SIMPLEX, 0.3, rc, 1)
89
+
90
+ if guidance:
91
+ cv2.putText(frame, guidance['voice'][:80], (5, 68),
92
+ cv2.FONT_HERSHEY_SIMPLEX, 0.3, (100, 200, 255), 1)
93
+
94
+
95
+ def render_depth_mini(depth_norm, w, h):
96
+ dc = cv2.applyColorMap((depth_norm * 255).astype(np.uint8), cv2.COLORMAP_INFERNO)
97
+ return cv2.resize(dc, (w // 4, h // 4))
98
+
99
+
100
+ def render(frame, obstacles, gait, landmarks, risk_result, guidance, depth_mini=None):
101
+ """Render single-frame overlay with HUD, skeleton, obstacles, and optional depth minimap."""
102
+ h, w = frame.shape[:2]
103
+ out = frame.copy()
104
+ draw_obstacles(out, obstacles)
105
+ draw_skeleton(out, landmarks, gait, w, h)
106
+ draw_hud(out, risk_result, guidance)
107
+ if depth_mini is not None:
108
+ mh, mw = depth_mini.shape[:2]
109
+ out[h - mh - 5:h - 5, w - mw - 5:w - 5] = depth_mini
110
+ return out
requirements.txt ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ ultralytics>=8.0
2
+ transformers>=4.30
3
+ torch>=2.0
4
+ mediapipe>=0.10
5
+ opencv-python-headless>=4.8
6
+ gradio>=4.0
7
+ numpy>=1.24
8
+ Pillow>=9.0
9
+ edge-tts>=6.0
10
+ lapx>=0.5
yolov8n.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f59b3d833e2ff32e194b5bb8e08d211dc7c5bdf144b90d2c8412c47ccfc83b36
3
+ size 6549796