1. GestureLine與GestureData類別
這些類別將用於存儲手寫的軌跡點和數據。
[System.Serializable] public class GestureLine { public List<Vector2> points = new List<Vector2>(); public bool closedLine; } [System.Serializable] public class GestureData { public List<GestureLine> lines = new List<GestureLine>(); public GestureLine LastLine { get { return lines[lines.Count - 1]; } } }
GestureLine用於存儲一個手寫的軌跡點,並包含一個bool值來表示是否閉合軌跡。
GestureData則用於存儲多個GestureLine,代表完整的手寫數據。
2. GesturePattern類別
用於定義手寫模式。這個類別將包含手寫的識別標識符、手寫數據以及其他相關屬性。
[CreateAssetMenu(menuName = "GestureRecognizer/GesturePattern")] public class GesturePattern : ScriptableObject { public string id; public GestureData gesture; public bool useLinesOrder; public bool useLinesDirections; }
在這個類別中,使用CreateAssetMenu特性來在Unity編輯器中創建新的手勢模式。
3. 正規化數據
為了確保手寫識別的一致性,需要對手寫數據進行正規化。這將幫助消除尺度變化對識別結果的影響。
private GestureData NormalizeScale(GestureData data) { // 計算手勢軌跡的包圍框 var rect = CalcRect(data); // 正規化手勢數據的每個軌跡點 var result = new GestureData(); foreach (var line in data.lines) { result.lines.Add(new GestureLine() { points = line.points.Select(e => Rect.PointToNormalized(rect, e)).ToList(), closedLine = line.closedLine }); } return result; } //取得最大手寫軌跡範圍 private Rect CalcRect(GestureData data) { float minx, miny, maxx, maxy; minx = maxx = data.lines[0].points[0].x; miny = maxy = data.lines[0].points[0].y; for (int j = 0; j < data.lines.Count; j++) { var points = data.lines[j].points; for (int i = 0; i < points.Count; i++) { var p = points[i]; minx = Mathf.Min(minx, p.x); maxx = Mathf.Max(maxx, p.x); miny = Mathf.Min(miny, p.y); maxy = Mathf.Max(maxy, p.y); } } Rect rect = Rect.MinMaxRect(minx, miny, maxx, maxy); float rectsize = Mathf.Max(rect.width, rect.height); rect = new Rect(rect.center - new Vector2(rectsize / 2, rectsize / 2), new Vector2(rectsize, rectsize)); return rect; }
4. 特徵提取
在手寫識別中,特徵提取是非常重要的一步,它有助於將手寫的抽象表示轉換為可比較的數值。在程式碼中,選擇了曲線、角度和點的位置作為特徵來進行手勢比較。
4.1 曲線特徵:
在特徵提取的第一步中,計算了每條手寫線的曲線特徵。曲線特徵反映了手寫在不同點上的彎曲程度。使用了一個稱為"曲率"的特徵來量化這種彎曲程度。具體而言,計算了每個點的曲率,並使用這些曲率值來比較手勢之間的相似性。
private List<float> CalcCurvature(List<Vector2> points) { int step = 10; List<float> result = new List<float>(); for (int i = 0; i < step; i++) result.Add(0); for (int i = step; i < points.Count - step; i++) { var p1 = points[i - step]; var p2 = points[i]; var p3 = points[i + step]; var v1 = p2 - p1; var v2 = p3 - p2; var angle1 = Mathf.Atan2(v1.y, v1.x) * Mathf.Rad2Deg; var angle2 = Mathf.Atan2(v2.y, v2.x) * Mathf.Rad2Deg; var angle = Mathf.DeltaAngle(angle1, angle2); result.Add(angle); } for (int i = 0; i < step; i++) result.Add(0); return result; } private float CalcCurvatureDistance(List<Vector2> points1, List<Vector2> points2) { int n = points1.Count; var curv1 = CalcCurvature(points1); var curv2 = CalcCurvature(points2); float sumCurvDistance = 0; for (int i = 0; i < n; i++) { float dif = Mathf.Abs(curv1[i] - curv2[i]) / 360f; sumCurvDistance += dif; } return sumCurvDistance; }
4.2 角度特徵:
除了曲線特徵外,還計算了每條手寫線的角度特徵。角度特徵描述了手寫在不同點上的方向變化情況。計算了每個點之間的角度變化,並將這些角度值用於手寫的比較。
private List<float> CalcAngles(List<Vector2> points) { int step = 10; List<float> result = new List<float>(); for (int i = 0; i < points.Count; i++) { int i1 = Mathf.Max(i - step, 0); int i2 = Mathf.Min(i + step, points.Count - 1); var v1 = points[i1]; var v2 = points[i2]; var dir = v2 - v1; var angle = Mathf.Atan2(dir.y, dir.x) * Mathf.Rad2Deg; if (angle < 0) angle += 360; result.Add(angle); } return result; } private float CalcAngleDistance(List<Vector2> points1, List<Vector2> points2) { int n = points1.Count; var angles1 = CalcAngles(points1); var angles2 = CalcAngles(points2); float sumAngleDistance = 0; for (int i = 0; i < n; i++) { float dif = Mathf.Abs(Mathf.DeltaAngle(angles1[i], angles2[i])) / 360f; sumAngleDistance += dif; } return sumAngleDistance; }
4.3 位置特徵:
最後,計算了每個點的位置特徵。位置特徵描述了手寫線上每個點相對於手寫的位置。使用歐幾里德距離來測量點之間的距離。但是,由於你在比較軌跡,要確保橫向和縱向的距離都有相同的影響,以達到公平比較。因此,將歐幾里德距離作爲等腰直角三角形的斜邊,股邊(x,y)皆為1,得到基礎單位根號2,再將所有距離除以根號2,獲得一個合適的標準化距離。
private float CalcPositionDistance(List<Vector2> points1, List<Vector2> points2) { float sqrt2 = Mathf.Sqrt(2); float sumDistance = 0; int n = points1.Count; for (int i = 0; i < n; i++) { float dif = Vector2.Distance(points1[i], points2[i]) / sqrt2; sumDistance += dif; } return sumDistance; }
5. 識別過程
在手寫識別部分,使用提取的特徵來進行手寫的比較,以確定兩個手寫軌跡是否相似。通過計算每對手寫之間的特徵距離,然後使用一個綜合得分來評估它們的相似性。
5.1 計算特徵距離:
使用曲線特徵、角度特徵和位置特徵來計算特徵距離。對於每對手寫軌跡,計算這些特徵的差異,並將它們結合成一個綜合得分。
private Score CalcListScore(List<Vector2> points1, List<Vector2> points2, bool points2IsClosed) { if (points2IsClosed) { return CalcCircularListScore(points1, points2); } else { return CalcLinearListScore(points1, points2); } } private Score CalcLinearListScore(List<Vector2> points1, List<Vector2> points2) { float posDist = CalcPositionDistance(points1, points2); float curvDist = CalcCurvatureDistance(points1, points2); float angleDist = CalcAngleDistance(points1, points2); return new Score() { positionDistance = posDist, curvatureDistance = curvDist, angleDistance = angleDist }; } //曲線特徵 pivate float CalcCurvatureDistance(List<Vector2> points1, List<Vector2> points2) { int n = points1.Count; var curv1 = CalcCurvature(points1); var curv2 = CalcCurvature(points2); float sumCurvDistance = 0; for (int i = 0; i < n; i++) { float dif = Mathf.Abs(curv1[i] - curv2[i]) / 360f; sumCurvDistance += dif; } return sumCurvDistance; }
5.2 評估手寫相似性:
根據計算得到的特徵距離,計算手寫的綜合得分。使用一組權重來結合不同特徵的距離,並得到最終的相似性得分。
每個特徵的分數都乘以一個權重(即數字4、1和1),然後這些部分分數被加總起來。最後,將這個總分除以6,以確保最終的分數在0到1之間。
public struct Score { public float positionDistance; public float curvatureDistance; public float angleDistance; public float score { get { float posScore = Mathf.Clamp01(1f - positionDistance / 50); float curvScore = Mathf.Clamp01(1f - curvatureDistance / 50); float angleScore = Mathf.Clamp01(1f - angleDistance / 50); return Mathf.Clamp01((4 * posScore + 1 * curvScore + 1 * angleScore) / 6); } } public void InitMax() { positionDistance = curvatureDistance = angleDistance = float.MaxValue; } public static Score MaxDistance { get { var result = new Score(); result.InitMax(); return result; } } public static bool operator >(Score s1, Score s2) { return s1.score > s2.score; } public static bool operator <(Score s1, Score s2) { return s1.score < s2.score; } public static bool operator >=(Score s1, Score s2) { return s1.score >= s2.score; } public static bool operator <=(Score s1, Score s2) { return s1.score <= s2.score; } }
5.3 手寫識別過程
最終,通過比較不同手勢的得分來識別手勢。可分出最多4個執行緒下去做手寫軌跡比對,將待識別的手寫與預定義的手勢模式進行比較,並選擇得分最高的模式作為識別結果。
public RecognitionResult Recognize(GestureData data, bool normalizeScale = true) { var timer = new System.Diagnostics.Stopwatch(); timer.Start(); var normData = NormalizeData(data, normalizeScale); var found = findPattern(normData, normalizeScale); timer.Stop(); found.recognitionTime = (float)(timer.ElapsedMilliseconds / 1000.0); return found; } //將手寫數據的尺寸進行歸一化,以便不同尺寸的手寫數據可以進行比較 private GestureData NormalizeData(GestureData data, bool normalizeScale) { if (normalizeScale) { return NormalizeDistribution(NormalizeScale(NormalizeClosedLines(data)), Detail); } else { return NormalizeDistribution(NormalizeClosedLines(data), Detail); } } //對閉合的手寫軌跡線進行歸一化處理,以確保軌跡的起點和終點連接起來,並且起點不會有明顯偏移 private GestureData NormalizeClosedLines(GestureData data) { var result = new GestureData(); foreach (var line in data.lines) { var resultLine = new GestureLine() { points = line.points.ToList(), closedLine = line.closedLine }; if (line.closedLine) { resultLine.points.Add(Vector2.Lerp(resultLine.points.Last(), resultLine.points[0], 0.99f)); } result.lines.Add(resultLine); } return result; } //對手寫數據中的點進行均勻分布處理,以確保軌跡點的密度是一致的 private GestureData NormalizeDistribution(GestureData data, int n) { var result = new GestureData(); foreach (var line in data.lines) { result.lines.Add(new GestureLine() { points = NormalizeDistribution(line.points, n), closedLine = line.closedLine }); } return result; } //對每一條手寫線的點進行均勻分布處理 private List<Vector2> NormalizeDistribution(List<Vector2> path, int n) { List<float> realPos = new List<float>(); realPos.Add(0); for (int i = 1; i < path.Count; i++) { var v1 = path[i - 1]; var v2 = path[i]; realPos.Add(realPos[i - 1] + Vector2.Distance(v1, v2)); } float totalDist = realPos.Last(); var normPos = realPos.Select(e => e / totalDist).ToList(); var result = new List<Vector2>(); for (int ti = 0; ti <= n; ti++) { float t = (float)ti / n; result.Add(FindByNormalized(path, normPos, t)); } return result; } //利用插值找到對應的點 private Vector2 FindByNormalized(List<Vector2> vs, List<float> ts, float t) { for (int i = 0; i < ts.Count - 1; i++) { var t1 = ts[i]; var t2 = ts[i + 1]; if (t1 <= t && t <= t2) { var v1 = vs[i]; var v2 = vs[i + 1]; float tt = Mathf.InverseLerp(t1, t2, t); return Vector2.Lerp(v1, v2, tt); } } return t > 0.5f ? vs[vs.Count - 1] : vs[0]; } //找到最相似的手寫模板 private RecognitionResult FindPattern(GestureData queryData, bool normalizeScale) { var bestGesture = default(GesturePattern); var bestScore = Score.MaxDistance; var indexes = Enumerable.Range(0, queryData.lines.Count).ToList(); List<List<int>> permutIndexes = GenPermutations(indexes); var permutations = permutIndexes.Select(e => MakePermutation(e, queryData)).ToList(); var singlePermutation = permutations.GetRange(0, 1); int n_threads = Mathf.Min(this.numberOfThreads, patterns.Count); var threads = new List<Thread>(); for (int threadIndex = 0; threadIndex < n_threads; threadIndex++) { int beginIndex = threadIndex * patterns.Count / n_threads; int endIndex = (threadIndex + 1) * patterns.Count / n_threads - 1; threads.Add(new Thread(() => { var result = SearchThroughPatterns(beginIndex, endIndex, queryData, normalizeScale, permutations, singlePermutation); lock (this) { if (result.score > bestScore) { bestScore = result.score; bestGesture = result.gesture; } } })); } for (int i = 0; i < threads.Count; i++) threads[i].Start(); for (int i = 0; i < threads.Count; i++) threads[i].Join(); return new RecognitionResult() { gesture = bestGesture, score = bestScore }; } //依照indexs取得新的GestureData private GestureData makePermutation(List<int> indexes, GestureData data) { return new GestureData() { lines = indexes.Select(e => data.lines[e]).ToList() }; } //遞回所有線條順序 private static List<List<int>> GenPermutations(List<int> list, int low = 0) { System.Action<int, int> swap = (int a, int b) => { var temp = list[a]; list[a] = list[b]; list[b] = temp; }; var result = new List<List<int>>(); if (low + 1 >= list.Count) { result.Add(new List<int>(list)); } else { foreach (var p in GenPermutations(list, low + 1)) { result.Add(new List<int>(p)); } for (int i = low + 1; i < list.Count; i++) { swap(low, i); foreach (var p in GenPermutations(list, low + 1)) { result.Add(new List<int>(p)); } swap(low, i); } } return result; } //比對指定範圍內的手寫模板資料,找出最相似的模板 private RecognitionResult SearchThroughPatterns(int beginIndex, int endIndex, GestureData queryData, bool normalizeScale, List<GestureData> permutations, List<GestureData> singlePermutation) { var bestGesture = default(GesturePattern); var bestScore = Score.MaxDistance; for (int i = beginIndex; i <= endIndex; i++) { var gestureAsset = patterns[i]; var assetData = NormalizeData(gestureAsset.gesture, normalizeScale); if (assetData.lines.Count != queryData.lines.Count) { //忽略線數量不同的模板 continue; } //如果有指定需依照線的順序的話,就只有一組 var permutationsToLook = gestureAsset.useLinesOrder ? singlePermutation : permutations; foreach (var data in permutationsToLook) { var permutScore = CalcScore(data, assetData, gestureAsset.useLinesDirections); float pd = permutScore.positionDistance; float cd = permutScore.curvatureDistance; float ad = permutScore.angleDistance; if (permutScore > bestScore) { bestScore = permutScore; bestGesture = gestureAsset; } } }
6. 實際應用
最後,我們將手勢識別應用於實際場景中。我們創建DrawDetector類別來監聽用戶的手勢輸入並進行識別。
public class DrawDetector : MonoBehaviour, IBeginDragHandler, IDragHandler, IEndDragHandler, IPointerClickHandler { public Recognizer recognizer; public UILineRenderer line; private List<UILineRenderer> lines; [Range(0f,1f)] public float scoreToAccept = 0.8f; [Range(1,10)] public int minLines = 1; public int MinLines { set { minLines = Mathf.Clamp (value, 1, 10); } } [Range(1,10)] public int maxLines = 2; public int MaxLines { set { maxLines = Mathf.Clamp (value, 1, 10); } } public enum RemoveStrategy { RemoveOld, ClearAll } public RemoveStrategy removeStrategy; public bool clearNotRecognizedLines; public bool fixedArea = false; GestureData data = new GestureData(); [System.Serializable] public class ResultEvent : UnityEvent<RecognitionResult> {} public ResultEvent OnRecognize; RectTransform rectTransform; void Start(){ line.relativeSize = true; line.LineList = false; lines = new List<UILineRenderer> (){ line }; rectTransform = transform as RectTransform; UpdateLines (); } void OnValidate(){ maxLines = Mathf.Max (minLines, maxLines); } public void UpdateLines(){ while (lines.Count < data.lines.Count) { var newLine = Instantiate (line, line.transform.parent); lines.Add (newLine); } for (int i = 0; i < lines.Count; i++) { lines [i].Points = new Vector2[]{ }; lines [i].SetAllDirty (); } int n = Mathf.Min (lines.Count, data.lines.Count); for (int i = 0; i < n; i++) { lines [i].Points = data.lines [i].points.Select (p => RealToLine (p)).ToArray (); lines [i].SetAllDirty (); } } Vector2 RealToLine(Vector2 position){ var local = rectTransform.InverseTransformPoint (position); var normalized = Rect.PointToNormalized (rectTransform.rect, local); return normalized; } Vector2 FixedPosition(Vector2 position){ return position; } public void ClearLines(){ data.lines.Clear (); UpdateLines (); } public void OnPointerClick (PointerEventData eventData) { } public void OnBeginDrag (PointerEventData eventData) { if (data.lines.Count >= maxLines) { switch (removeStrategy) { case RemoveStrategy.RemoveOld: data.lines.RemoveAt (0); break; case RemoveStrategy.ClearAll: data.lines.Clear (); break; } } data.lines.Add (new GestureLine ()); var fixedPos = FixedPosition (eventData.position); if (data.LastLine.points.Count == 0 || data.LastLine.points.Last () != fixedPos) { data.LastLine.points.Add (fixedPos); UpdateLines (); } } public void OnDrag (PointerEventData eventData) { var fixedPos = FixedPosition (eventData.position); if (data.LastLine.points.Count == 0 || data.LastLine.points.Last () != fixedPos) { data.LastLine.points.Add (fixedPos); UpdateLines (); } } public void OnEndDrag (PointerEventData eventData) { StartCoroutine (OnEndDragCoroutine (eventData)); } IEnumerator OnEndDragCoroutine(PointerEventData eventData){ data.LastLine.points.Add (FixedPosition(eventData.position)); UpdateLines (); for (int size = data.lines.Count; size >= 1 && size >= minLines; size--) { //last [size] lines var sizedData = new GestureData () { lines = data.lines.GetRange (data.lines.Count - size, size) }; var sizedNormalizedData = sizedData; if (fixedArea) { var rect = this.rectTransform.rect; sizedNormalizedData = new GestureData (){ lines = sizedData.lines.Select( line => new GestureLine(){ closedLine = line.closedLine, points = line.points.Select( p => Rect.PointToNormalized(rect, this.rectTransform.InverseTransformPoint(p) ) ).ToList() } ).ToList() }; } RecognitionResult result = null; // 從最後size個軌跡中識別手勢 var thread = new System.Threading.Thread (()=>{ result = recognizer.Recognize (sizedNormalizedData, normalizeScale: !fixedArea); }); thread.Start (); while (thread.IsAlive) { yield return null; } if (result.gesture != null && result.score.score >= scoreToAccept) { OnRecognize.Invoke (result); if (clearNotRecognizedLines) { data = sizedData; UpdateLines (); } break; } else { OnRecognize.Invoke (RecognitionResult.Empty); } } yield return null; } }
7. 實做範例畫面
留言列表