- Posts: 2148
- Joined: Fri Nov 13, 2020 11:03 pm
- Real Name: Maxwell Allington
Here's the script (I just had to pip install mediapipe) :
- Code: Select all
import cv2
import mediapipe as mp
import json
# Initialize MediaPipe FaceMesh
mp_drawing = mp.solutions.drawing_utils
mp_face_mesh = mp.solutions.face_mesh
# Input video file and output JSON file
video_file = 'face.mp4'
output_file = 'output.json'
# Open video file for reading
cap = cv2.VideoCapture(video_file)
# Output dictionary to store position and rotation data
output_data = {}
# Load FaceMesh model
with mp_face_mesh.FaceMesh(
static_image_mode=False,
max_num_faces=1,
min_detection_confidence=0.5,
min_tracking_confidence=0.5,
) as face_mesh:
# Custom connections for drawing the face landmarks
connections = [
(0, 1), (1, 2), (2, 3), (3, 4), (5, 6), (6, 7), (7, 8),
(9, 10), (10, 11), (11, 12), (13, 14), (14, 15), (15, 16),
(17, 18), (18, 19), (19, 20), (21, 22), (22, 23), (23, 24),
(25, 26), (26, 27), (27, 28), (29, 30), (30, 31), (31, 32),
(33, 34), (34, 35), (35, 36), (37, 38), (38, 39), (39, 40),
(41, 42), (42, 43), (43, 44), (45, 46), (46, 47), (47, 48),
(49, 50), (50, 51), (51, 52), (53, 54), (54, 55), (55, 56),
(57, 58), (58, 59), (59, 60), (61, 62), (62, 63), (63, 64),
(65, 66), (66, 67), (67, 68), (69, 70), (70, 71), (71, 72),
(73, 74), (74, 75), (75, 76), (77, 78), (78, 79), (79, 80),
(81, 82), (82, 83), (83, 84), (85, 86), (86, 87), (87, 88),
]
# Process each frame in the video
while cap.isOpened():
success, image = cap.read()
if not success:
break
# Convert the image to RGB
image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
# Use MediaPipe FaceMesh for face tracking
results = face_mesh.process(image_rgb)
# Check if any face is detected
if results.multi_face_landmarks:
for face_landmarks in results.multi_face_landmarks:
face_data = {}
# Store position data
position_data = {
'x': face_landmarks.landmark[0].x,
'y': face_landmarks.landmark[0].y,
'z': face_landmarks.landmark[0].z
}
face_data['Position'] = position_data
# Store rotation data
rotation_data = {
'x': face_landmarks.landmark[0].x,
'y': face_landmarks.landmark[0].y,
'z': face_landmarks.landmark[0].z
}
face_data['Rotation'] = rotation_data
# Add face data to output dictionary
output_data[len(output_data)] = face_data
# Display the result on the image
if results.multi_face_landmarks:
for face_landmarks in results.multi_face_landmarks:
mp_drawing.draw_landmarks(
image,
face_landmarks,
connections,
mp_drawing.DrawingSpec(color=(0, 255, 0), thickness=1, circle_radius=1),
mp_drawing.DrawingSpec(color=(0, 0, 255), thickness=1),
)
# Show the image
cv2.imshow('3D Face Tracking', image)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
# Write the output data to a JSON file
with open(output_file, 'w') as f:
json.dump(output_data, f)
# Release the video capture
cap.release()
# Close all OpenCV windows
cv2.destroyAllWindows()
This create a json file that looks like this, for each frame, it wrote the x,y,z position and x,y,z rotation (I had 330 frames, I cut at 5 just to show it here) :
- Code: Select all
{
"0": {
"Position": {
"x": 0.4771953225135803,
"y": 0.4921644926071167,
"z": -0.01905098371207714
},
"Rotation": {
"x": 0.48883700370788576,
"y": 0.4551820158958435,
"z": -0.0358513742685318
}
},
"1": {
"Position": {
"x": 0.47622111439704897,
"y": 0.4933364987373352,
"z": -0.018306903541088105
},
"Rotation": {
"x": 0.48598533868789675,
"y": 0.454181969165802,
"z": -0.035381920635700229
}
},
"2": {
"Position": {
"x": 0.47509485483169558,
"y": 0.4971919357776642,
"z": -0.019016912207007409
},
"Rotation": {
"x": 0.4855861961841583,
"y": 0.4548364579677582,
"z": -0.03629734367132187
}
},
"3": {
"Position": {
"x": 0.4739174544811249,
"y": 0.5019863247871399,
"z": -0.018797742202878
},
"Rotation": {
"x": 0.4844766855239868,
"y": 0.4584697186946869,
"z": -0.03602452948689461
}
},
"4": {
"Position": {
"x": 0.47195130586624148,
"y": 0.5062845349311829,
"z": -0.01845380663871765
},
"Rotation": {
"x": 0.4835212528705597,
"y": 0.463336318731308,
"z": -0.03644793480634689
}
},
"5": {
"Position": {
"x": 0.4714433252811432,
"y": 0.511802077293396,
"z": -0.018018366768956186
},
But now I'm struggling to create a Transform3D node from it because I don't understand the structure and how to convert the raw coordinates to something readable by Fusion.
I was able to make one work by removing what I don't understand in the node.
Here an example of a regular Transform3D node (just to show its typical structure) :
- Code: Select all
{
Tools = ordered() {
Transform3D2 = Transform3D {
CtrlWZoom = false,
Inputs = {
["Transform3DOp.Translate.X"] = Input {
SourceOp = "Transform3D2XOffset",
Source = "Value",
},
["Transform3DOp.Translate.Y"] = Input {
SourceOp = "Transform3D2YOffset",
Source = "Value",
},
["Transform3DOp.Translate.Z"] = Input {
SourceOp = "Transform3D2ZOffset",
Source = "Value",
},
["Transform3DOp.Rotate.X"] = Input {
SourceOp = "Transform3D2XRotation",
Source = "Value",
},
["Transform3DOp.Rotate.Y"] = Input {
SourceOp = "Transform3D2YRotation",
Source = "Value",
},
["Transform3DOp.Rotate.Z"] = Input {
SourceOp = "Transform3D2ZRotation",
Source = "Value",
},
},
ViewInfo = OperatorInfo { Pos = { 1265, -214.5 } },
},
Transform3D2XOffset = BezierSpline {
SplineColor = { Red = 250, Green = 59, Blue = 49 },
CtrlWZoom = false,
NameSet = true,
KeyFrames = {
[1] = { 0.115, RH = { 11, 0.088 }, Flags = { Linear = true } },
[31] = { 0.034, LH = { 21, 0.061 }, Flags = { Linear = true } }
}
},
Transform3D2YOffset = BezierSpline {
SplineColor = { Red = 252, Green = 206, Blue = 47 },
CtrlWZoom = false,
NameSet = true,
KeyFrames = {
[1] = { 0.181, RH = { 11, 0.134 }, Flags = { Linear = true } },
[31] = { 0.04, LH = { 21, 0.087 }, Flags = { Linear = true } }
}
},
Transform3D2ZOffset = BezierSpline {
SplineColor = { Red = 254, Green = 131, Blue = 46 },
CtrlWZoom = false,
NameSet = true,
KeyFrames = {
[1] = { 2.63, RH = { 11, 2.46 }, Flags = { Linear = true } },
[31] = { 2.12, LH = { 21, 2.29 }, Flags = { Linear = true } }
}
},
Transform3D2XRotation = BezierSpline {
SplineColor = { Red = 255, Green = 128, Blue = 128 },
CtrlWZoom = false,
NameSet = true,
KeyFrames = {
[1] = { 70.9, RH = { 11, 57.5666666666667 }, Flags = { Linear = true } },
[31] = { 30.9, LH = { 21, 44.2333333333333 }, Flags = { Linear = true } }
}
},
Transform3D2YRotation = BezierSpline {
SplineColor = { Red = 128, Green = 255, Blue = 128 },
CtrlWZoom = false,
NameSet = true,
KeyFrames = {
[1] = { 54.5, RH = { 11, 47.2333333333333 }, Flags = { Linear = true } },
[31] = { 32.7, LH = { 21, 39.9666666666667 }, Flags = { Linear = true } }
}
},
Transform3D2ZRotation = BezierSpline {
SplineColor = { Red = 128, Green = 128, Blue = 255 },
CtrlWZoom = false,
NameSet = true,
KeyFrames = {
[1] = { 47.3, RH = { 11, 40.0333333333333 }, Flags = { Linear = true } },
[31] = { 25.5, LH = { 21, 32.7666666666667 }, Flags = { Linear = true } }
}
}
},
ActiveTool = "Transform3D2"
}
I had a script that took the data from the json file and to try to make a Transform3D node from it like what you see below ...
I removed everything that I didn't know how to deal with ( ) and the final result looks like this :
- Code: Select all
{
Tools = ordered() {
Transform3D1 = Transform3D {
CtrlWZoom = false,
Inputs = {
["Transform3DOp.Translate.X"] = Input {
SourceOp = "Transform3D1XOffset",
Source = "Value",
},
["Transform3DOp.Translate.Y"] = Input {
SourceOp = "Transform3D1YOffset",
Source = "Value",
},
["Transform3DOp.Translate.Z"] = Input {
SourceOp = "Transform3D1ZOffset",
Source = "Value",
},
["Transform3DOp.Rotate.X"] = Input {
SourceOp = "Transform3D1XRotation",
Source = "Value",
},
["Transform3DOp.Rotate.Y"] = Input {
SourceOp = "Transform3D1YRotation",
Source = "Value",
},
["Transform3DOp.Rotate.Z"] = Input {
SourceOp = "Transform3D1ZRotation",
Source = "Value",
},
},
ViewInfo = OperatorInfo { Pos = { 495, -148.5 } },
},
Transform3D1XOffset = BezierSpline {
SplineColor = { Red = 250, Green = 59, Blue = 49 },
KeyFrames = {
[0] = { 4.771953225135803 },
[1] = { 4.7622111439704895 },
[331] = { 4.792782664299011 }
}
},
Transform3D1YOffset = BezierSpline {
SplineColor = { Red = 252, Green = 206, Blue = 47 },
KeyFrames = {
[0] = { 4.921644926071167 },
[1] = { 4.933364987373352 },
[331] = { 4.9396127462387085 }
}
},
Transform3D1ZOffset = BezierSpline {
SplineColor = { Red = 254, Green = 131, Blue = 46 },
KeyFrames = {
[0] = { -0.1905098371207714 },
[1] = { -0.18306903541088104 },
[331] = { -0.19052352756261826 }
}
},
Transform3D1XRotation = BezierSpline {
SplineColor = { Red = 255, Green = 128, Blue = 128 },
KeyFrames = {
[0] = { 4.888370037078857 },
[1] = { 4.859853386878967 },
[331] = { 4.912715554237366 }
}
},
Transform3D1YRotation = BezierSpline {
SplineColor = { Red = 128, Green = 255, Blue = 128 },
KeyFrames = {
[0] = { 4.551820158958435 },
[1] = { 4.54181969165802 },
[331] = { 4.512617588043213 }
}
},
Transform3D1ZRotation = BezierSpline {
SplineColor = { Red = 128, Green = 128, Blue = 255 },
CtrlWZoom = false,
KeyFrames = {
[0] = { -0.358513742685318 },
[1] = { -0.35381920635700226 },
[331] = { -0.36199696362018585 }
}
}
}
}
(I shortened the code, but you get the point, in goes from 0 to 331).
But since I certainly removed important parts, by pasting that into fusion, I can see the path, and the object connected to the Transform3d node, but it only moves on the X,Y plane (Translation).
I guess I'm close to get something working, but I'm stuck.
I imagine that removing the LH and RH (right handle and left handle) are messing with the final result...
I'm amazed how fare I was able to go because I only know the very basics of python. I just logic to hack things together.