close

1. GestureLine與GestureData類別

這些類別將用於存儲手寫的軌跡點和數據。

[System.Serializable]
public class GestureLine {
    public List<Vector2> points = new List<Vector2>();
    public bool closedLine;
}

[System.Serializable]
public class GestureData {
    public List<GestureLine> lines = new List<GestureLine>();
    public GestureLine LastLine { get { return lines[lines.Count - 1]; } }
}

GestureLine用於存儲一個手寫的軌跡點,並包含一個bool值來表示是否閉合軌跡。

GestureData則用於存儲多個GestureLine,代表完整的手寫數據。

2. GesturePattern類別

用於定義手寫模式。這個類別將包含手寫的識別標識符、手寫數據以及其他相關屬性。

[CreateAssetMenu(menuName = "GestureRecognizer/GesturePattern")]
public class GesturePattern : ScriptableObject {
    public string id;
    public GestureData gesture;
    public bool useLinesOrder;
    public bool useLinesDirections;
}

在這個類別中,使用CreateAssetMenu特性來在Unity編輯器中創建新的手勢模式。

3. 正規化數據

為了確保手寫識別的一致性,需要對手寫數據進行正規化。這將幫助消除尺度變化對識別結果的影響。

private GestureData NormalizeScale(GestureData data) {
    // 計算手勢軌跡的包圍框
    var rect = CalcRect(data);

    // 正規化手勢數據的每個軌跡點
    var result = new GestureData();
    foreach (var line in data.lines) {
        result.lines.Add(new GestureLine() {
            points = line.points.Select(e => Rect.PointToNormalized(rect, e)).ToList(),
            closedLine = line.closedLine
        });
    }
    return result;
}

//取得最大手寫軌跡範圍
private Rect CalcRect(GestureData data)
{
    float minx, miny, maxx, maxy;
    minx = maxx = data.lines[0].points[0].x;
    miny = maxy = data.lines[0].points[0].y;

    for (int j = 0; j < data.lines.Count; j++)
    {
        var points = data.lines[j].points;

        for (int i = 0; i < points.Count; i++)
        {

            var p = points[i];
            minx = Mathf.Min(minx, p.x);
            maxx = Mathf.Max(maxx, p.x);
            miny = Mathf.Min(miny, p.y);
            maxy = Mathf.Max(maxy, p.y);
        }
    }
    Rect rect = Rect.MinMaxRect(minx, miny, maxx, maxy);
    float rectsize = Mathf.Max(rect.width, rect.height);
    rect = new Rect(rect.center - new Vector2(rectsize / 2, rectsize / 2), new Vector2(rectsize, rectsize));
    return rect;
}

4. 特徵提取 

在手寫識別中,特徵提取是非常重要的一步,它有助於將手寫的抽象表示轉換為可比較的數值。在程式碼中,選擇了曲線、角度和點的位置作為特徵來進行手勢比較。

4.1 曲線特徵:

在特徵提取的第一步中,計算了每條手寫線的曲線特徵。曲線特徵反映了手寫在不同點上的彎曲程度。使用了一個稱為"曲率"的特徵來量化這種彎曲程度。具體而言,計算了每個點的曲率,並使用這些曲率值來比較手勢之間的相似性。

private List<float> CalcCurvature(List<Vector2> points) {
    int step = 10;
    List<float> result = new List<float>();
    for (int i = 0; i < step; i++)
    result.Add(0);
    for (int i = step; i < points.Count - step; i++) {
        var p1 = points[i - step];
        var p2 = points[i];
        var p3 = points[i + step];
        var v1 = p2 - p1;
        var v2 = p3 - p2;
        var angle1 = Mathf.Atan2(v1.y, v1.x) * Mathf.Rad2Deg;
        var angle2 = Mathf.Atan2(v2.y, v2.x) * Mathf.Rad2Deg;
        var angle = Mathf.DeltaAngle(angle1, angle2);
        result.Add(angle);
    }
    for (int i = 0; i < step; i++)
    result.Add(0);
    return result;
}

private float CalcCurvatureDistance(List<Vector2> points1, List<Vector2> points2)
{

    int n = points1.Count;

    var curv1 = CalcCurvature(points1);
    var curv2 = CalcCurvature(points2);

    float sumCurvDistance = 0;

    for (int i = 0; i < n; i++)
    {
        float dif = Mathf.Abs(curv1[i] - curv2[i]) / 360f;
        sumCurvDistance += dif;
    }

    return sumCurvDistance;
}

4.2 角度特徵:
除了曲線特徵外,還計算了每條手寫線的角度特徵。角度特徵描述了手寫在不同點上的方向變化情況。計算了每個點之間的角度變化,並將這些角度值用於手寫的比較。

private List<float> CalcAngles(List<Vector2> points) {
    int step = 10;
    List<float> result = new List<float>();
    for (int i = 0; i < points.Count; i++) {
        int i1 = Mathf.Max(i - step, 0);
        int i2 = Mathf.Min(i + step, points.Count - 1);
        var v1 = points[i1];
        var v2 = points[i2];
        var dir = v2 - v1;
        var angle = Mathf.Atan2(dir.y, dir.x) * Mathf.Rad2Deg;
        if (angle < 0)
            angle += 360;
        result.Add(angle);
    }
    return result;
}

private float CalcAngleDistance(List<Vector2> points1, List<Vector2> points2)
{

    int n = points1.Count;

    var angles1 = CalcAngles(points1);
    var angles2 = CalcAngles(points2);

    float sumAngleDistance = 0;

    for (int i = 0; i < n; i++)
    {
        float dif = Mathf.Abs(Mathf.DeltaAngle(angles1[i], angles2[i])) / 360f;
        sumAngleDistance += dif;
    }

    return sumAngleDistance;
}

4.3 位置特徵:
最後,計算了每個點的位置特徵。位置特徵描述了手寫線上每個點相對於手寫的位置。使用歐幾里德距離來測量點之間的距離。但是,由於你在比較軌跡,要確保橫向和縱向的距離都有相同的影響,以達到公平比較。因此,將歐幾里德距離作爲等腰直角三角形的斜邊,股邊(x,y)皆為1,得到基礎單位根號2,再將所有距離除以根號2,獲得一個合適的標準化距離。

private float CalcPositionDistance(List<Vector2> points1, List<Vector2> points2) {
    float sqrt2 = Mathf.Sqrt(2);
    float sumDistance = 0;
    int n = points1.Count;
    for (int i = 0; i < n; i++) {
        float dif = Vector2.Distance(points1[i], points2[i]) / sqrt2;
        sumDistance += dif;
    }
    return sumDistance;
}

5. 識別過程

在手寫識別部分,使用提取的特徵來進行手寫的比較,以確定兩個手寫軌跡是否相似。通過計算每對手寫之間的特徵距離,然後使用一個綜合得分來評估它們的相似性。

5.1 計算特徵距離:

使用曲線特徵、角度特徵和位置特徵來計算特徵距離。對於每對手寫軌跡,計算這些特徵的差異,並將它們結合成一個綜合得分。

private Score CalcListScore(List<Vector2> points1, List<Vector2> points2, bool points2IsClosed) {
    if (points2IsClosed) {
        return CalcCircularListScore(points1, points2);
    } else {
        return CalcLinearListScore(points1, points2);
    }
}
 
private Score CalcLinearListScore(List<Vector2> points1, List<Vector2> points2) {
    float posDist = CalcPositionDistance(points1, points2);
    float curvDist = CalcCurvatureDistance(points1, points2);
    float angleDist = CalcAngleDistance(points1, points2);

    return new Score() {
        positionDistance = posDist,
        curvatureDistance = curvDist,
        angleDistance = angleDist
    };
}

 
//曲線特徵
pivate float CalcCurvatureDistance(List<Vector2> points1, List<Vector2> points2)
{

    int n = points1.Count;

    var curv1 = CalcCurvature(points1);
    var curv2 = CalcCurvature(points2);

    float sumCurvDistance = 0;

    for (int i = 0; i < n; i++)
    {
        float dif = Mathf.Abs(curv1[i] - curv2[i]) / 360f;
        sumCurvDistance += dif;
    }

    return sumCurvDistance;
}

5.2 評估手寫相似性:
根據計算得到的特徵距離,計算手寫的綜合得分。使用一組權重來結合不同特徵的距離,並得到最終的相似性得分。 
每個特徵的分數都乘以一個權重(即數字4、1和1),然後這些部分分數被加總起來。最後,將這個總分除以6,以確保最終的分數在0到1之間。

public struct Score
{
    public float positionDistance;
    public float curvatureDistance;
    public float angleDistance;

    public float score
    {
        get
        {
            float posScore = Mathf.Clamp01(1f - positionDistance / 50);
            float curvScore = Mathf.Clamp01(1f - curvatureDistance / 50);
            float angleScore = Mathf.Clamp01(1f - angleDistance / 50);
            return Mathf.Clamp01((4 * posScore + 1 * curvScore + 1 * angleScore) / 6);
        }
    }

    public void InitMax()
    {
        positionDistance = curvatureDistance = angleDistance = float.MaxValue;
    }

    public static Score MaxDistance
    {
        get
        {
           var result = new Score();
           result.InitMax();
           return result;
        }      
    }

    public static bool operator >(Score s1, Score s2)
    {
        return s1.score > s2.score;
    }
    public static bool operator <(Score s1, Score s2)
    {
        return s1.score < s2.score;
    }
    public static bool operator >=(Score s1, Score s2)
    {
        return s1.score >= s2.score;
    }
    public static bool operator <=(Score s1, Score s2)
    {
        return s1.score <= s2.score;
    }
}

 

5.3 手寫識別過程
最終,通過比較不同手勢的得分來識別手勢。可分出最多4個執行緒下去做手寫軌跡比對,將待識別的手寫與預定義的手勢模式進行比較,並選擇得分最高的模式作為識別結果。 

public RecognitionResult Recognize(GestureData data, bool normalizeScale = true)
{

    var timer = new System.Diagnostics.Stopwatch();
    timer.Start();

    var normData = NormalizeData(data, normalizeScale);

    var found = findPattern(normData, normalizeScale);

    timer.Stop();

    found.recognitionTime = (float)(timer.ElapsedMilliseconds / 1000.0);

    return found;
}

//將手寫數據的尺寸進行歸一化,以便不同尺寸的手寫數據可以進行比較
private GestureData NormalizeData(GestureData data, bool normalizeScale)
{
    if (normalizeScale)
    {
        return NormalizeDistribution(NormalizeScale(NormalizeClosedLines(data)), Detail);
    }
    else
    {
        return NormalizeDistribution(NormalizeClosedLines(data), Detail);
    }
}

//對閉合的手寫軌跡線進行歸一化處理,以確保軌跡的起點和終點連接起來,並且起點不會有明顯偏移
private GestureData NormalizeClosedLines(GestureData data)
{
    var result = new GestureData();
    foreach (var line in data.lines)
    {
        var resultLine = new GestureLine()
        {
            points = line.points.ToList(),
            closedLine = line.closedLine
        };
        if (line.closedLine)
        {
            resultLine.points.Add(Vector2.Lerp(resultLine.points.Last(), resultLine.points[0], 0.99f));
        }
        result.lines.Add(resultLine);
    }
    return result;
}

//對手寫數據中的點進行均勻分布處理,以確保軌跡點的密度是一致的
private GestureData NormalizeDistribution(GestureData data, int n)
{
    var result = new GestureData();
    foreach (var line in data.lines)
    {
        result.lines.Add(new GestureLine()
        {
            points = NormalizeDistribution(line.points, n),
            closedLine = line.closedLine
        });
    }
    return result;
}
 
//對每一條手寫線的點進行均勻分布處理
private List<Vector2> NormalizeDistribution(List<Vector2> path, int n)
{

    List<float> realPos = new List<float>();

    realPos.Add(0);
    for (int i = 1; i < path.Count; i++)
    {
        var v1 = path[i - 1];
        var v2 = path[i];
        realPos.Add(realPos[i - 1] + Vector2.Distance(v1, v2));
    }

    float totalDist = realPos.Last();

    var normPos = realPos.Select(e => e / totalDist).ToList();

    var result = new List<Vector2>();

    for (int ti = 0; ti <= n; ti++)
    {
        float t = (float)ti / n;
        result.Add(FindByNormalized(path, normPos, t));
    }

    return result;
}

//利用插值找到對應的點
private Vector2 FindByNormalized(List<Vector2> vs, List<float> ts, float t)
{
    for (int i = 0; i < ts.Count - 1; i++)
    {
        var t1 = ts[i];
        var t2 = ts[i + 1];
        if (t1 <= t && t <= t2)
        {
            var v1 = vs[i];
            var v2 = vs[i + 1];
            float tt = Mathf.InverseLerp(t1, t2, t);
            return Vector2.Lerp(v1, v2, tt);
        }
    }
    return t > 0.5f ? vs[vs.Count - 1] : vs[0];
}

//找到最相似的手寫模板
private RecognitionResult FindPattern(GestureData queryData, bool normalizeScale)
{
    var bestGesture = default(GesturePattern);
    var bestScore = Score.MaxDistance;

    var indexes = Enumerable.Range(0, queryData.lines.Count).ToList();
    List<List<int>> permutIndexes = GenPermutations(indexes);

    var permutations = permutIndexes.Select(e => MakePermutation(e, queryData)).ToList();
    var singlePermutation = permutations.GetRange(0, 1);

    int n_threads = Mathf.Min(this.numberOfThreads, patterns.Count);

    var threads = new List<Thread>();

    for (int threadIndex = 0; threadIndex < n_threads; threadIndex++)
    {
        int beginIndex = threadIndex * patterns.Count / n_threads;
        int endIndex = (threadIndex + 1) * patterns.Count / n_threads - 1;

        threads.Add(new Thread(() =>
        {
            var result = SearchThroughPatterns(beginIndex, endIndex, queryData, normalizeScale, permutations, singlePermutation);

            lock (this)
            {
                if (result.score > bestScore)
                {
                    bestScore = result.score;
                    bestGesture = result.gesture;
                }
            }
        }));
    }

    for (int i = 0; i < threads.Count; i++)
    threads[i].Start();

    for (int i = 0; i < threads.Count; i++)
    threads[i].Join();

    return new RecognitionResult() { gesture = bestGesture, score = bestScore };
}
 
//依照indexs取得新的GestureData
private GestureData makePermutation(List<int> indexes, GestureData data)
{
    return new GestureData()
    {
        lines = indexes.Select(e => data.lines[e]).ToList()
    };
}
 
//遞回所有線條順序
private static List<List<int>> GenPermutations(List<int> list, int low = 0)
{

    System.Action<int, int> swap = (int a, int b) =>
    {
        var temp = list[a];
        list[a] = list[b];
        list[b] = temp;
    };

    var result = new List<List<int>>();

    if (low + 1 >= list.Count)
    {
        result.Add(new List<int>(list));
    }
    else
    {
        foreach (var p in GenPermutations(list, low + 1))
        {
            result.Add(new List<int>(p));
        }
        for (int i = low + 1; i < list.Count; i++)
        {
            swap(low, i);
            foreach (var p in GenPermutations(list, low + 1))
            {
                result.Add(new List<int>(p));
            }
            swap(low, i);
        }
    }
    return result;
}

//比對指定範圍內的手寫模板資料,找出最相似的模板
private RecognitionResult SearchThroughPatterns(int beginIndex, int endIndex, GestureData queryData, bool normalizeScale, List<GestureData> permutations, List<GestureData> singlePermutation)
{
    var bestGesture = default(GesturePattern);
    var bestScore = Score.MaxDistance;

    for (int i = beginIndex; i <= endIndex; i++)
    {
        var gestureAsset = patterns[i];
        var assetData = NormalizeData(gestureAsset.gesture, normalizeScale);

        if (assetData.lines.Count != queryData.lines.Count)
        {
            //忽略線數量不同的模板
            continue;
        }

        //如果有指定需依照線的順序的話,就只有一組
        var permutationsToLook = gestureAsset.useLinesOrder ? singlePermutation : permutations;

        foreach (var  data in permutationsToLook)
        {

            var permutScore = CalcScore(data, assetData, gestureAsset.useLinesDirections);

            float pd = permutScore.positionDistance;
            float cd = permutScore.curvatureDistance;
            float ad = permutScore.angleDistance;

            if (permutScore > bestScore)
            {
                bestScore = permutScore;
                bestGesture = gestureAsset;
            }
        }
    }

 

6. 實際應用
最後,我們將手勢識別應用於實際場景中。我們創建DrawDetector類別來監聽用戶的手勢輸入並進行識別。 

public class DrawDetector : MonoBehaviour, IBeginDragHandler, IDragHandler, IEndDragHandler, IPointerClickHandler {
   
   public Recognizer recognizer;

   public UILineRenderer line;
   private List<UILineRenderer> lines;

   [Range(0f,1f)]
   public float scoreToAccept = 0.8f;

   [Range(1,10)]
   public int minLines = 1;
   public int MinLines { set { minLines = Mathf.Clamp (value, 1, 10); } }

   [Range(1,10)]
   public int maxLines = 2;
   public int MaxLines { set { maxLines = Mathf.Clamp (value, 1, 10); } }

   public enum RemoveStrategy { RemoveOld, ClearAll }
   public RemoveStrategy removeStrategy;

   public bool clearNotRecognizedLines;

   public bool fixedArea = false;

   GestureData data = new GestureData();

   [System.Serializable]
   public class ResultEvent : UnityEvent<RecognitionResult> {}
   public ResultEvent OnRecognize;

   RectTransform rectTransform;


   void Start(){
      line.relativeSize = true;
      line.LineList = false;
      lines = new List<UILineRenderer> (){ line };
      rectTransform = transform as RectTransform;
      UpdateLines ();
   }

   void OnValidate(){
      maxLines = Mathf.Max (minLines, maxLines);
   }

   public void UpdateLines(){
      while (lines.Count < data.lines.Count) {
         var newLine = Instantiate (line, line.transform.parent);
         lines.Add (newLine);
      }
      for (int i = 0; i < lines.Count; i++) {
         lines [i].Points = new Vector2[]{ };
         lines [i].SetAllDirty ();
      }
      int n = Mathf.Min (lines.Count, data.lines.Count);
      for (int i = 0; i < n; i++) {
         lines [i].Points = data.lines [i].points.Select (p => RealToLine (p)).ToArray ();
         lines [i].SetAllDirty ();
      }
   }

   Vector2 RealToLine(Vector2 position){
      var local = rectTransform.InverseTransformPoint (position);
      var normalized = Rect.PointToNormalized (rectTransform.rect, local);
      return normalized;
   }

   Vector2 FixedPosition(Vector2 position){
      return position; 
   }

   public void ClearLines(){
      data.lines.Clear ();
      UpdateLines ();
   }

   public void OnPointerClick (PointerEventData eventData) {

   }

   public void OnBeginDrag (PointerEventData eventData) {

      if (data.lines.Count >= maxLines) {
         switch (removeStrategy) {
         case RemoveStrategy.RemoveOld:
            data.lines.RemoveAt (0);
            break;
         case RemoveStrategy.ClearAll:
            data.lines.Clear ();
            break;
         }
      }

      data.lines.Add (new GestureLine ());

      var fixedPos = FixedPosition (eventData.position);
      if (data.LastLine.points.Count == 0 || data.LastLine.points.Last () != fixedPos) {
         data.LastLine.points.Add (fixedPos);
         UpdateLines ();
      }
   }

   public void OnDrag (PointerEventData eventData) {
      var fixedPos = FixedPosition (eventData.position);
      if (data.LastLine.points.Count == 0 || data.LastLine.points.Last () != fixedPos) {
         data.LastLine.points.Add (fixedPos);
         UpdateLines ();
      }
   }

   public void OnEndDrag (PointerEventData eventData)
   {
      StartCoroutine (OnEndDragCoroutine (eventData));
   }

   IEnumerator OnEndDragCoroutine(PointerEventData eventData){

      data.LastLine.points.Add (FixedPosition(eventData.position));
      UpdateLines ();

      for (int size = data.lines.Count; size >= 1 && size >= minLines; size--) {
         //last [size] lines
         var sizedData = new GestureData () {
            lines = data.lines.GetRange (data.lines.Count - size, size)
         };

         var sizedNormalizedData = sizedData;

         if (fixedArea) {
            var rect = this.rectTransform.rect;
            sizedNormalizedData = new GestureData (){
               lines = sizedData.lines.Select( line => new GestureLine(){
                  closedLine = line.closedLine,
                  points = line.points.Select( p => Rect.PointToNormalized(rect, this.rectTransform.InverseTransformPoint(p) ) ).ToList()
               } ).ToList()
            };
         }

         RecognitionResult result = null;

         // 從最後size個軌跡中識別手勢

         var thread = new System.Threading.Thread (()=>{
            result = recognizer.Recognize (sizedNormalizedData, normalizeScale: !fixedArea);
         });
         thread.Start ();
         while (thread.IsAlive) {
            yield return null;
         }

         if (result.gesture != null && result.score.score >= scoreToAccept) {
            OnRecognize.Invoke (result);
            if (clearNotRecognizedLines) {
               data = sizedData;
               UpdateLines ();
            }
            break;
         } else {
            OnRecognize.Invoke (RecognitionResult.Empty);
         }
      }

      yield return null;
   }

} 

7. 實做範例畫面

 

ezgif.com-crop

arrow
arrow

    Kouhei 發表在 痞客邦 留言(0) 人氣()