From df8253dbf6cb09093018333f99dd9c19ba0ff02b Mon Sep 17 00:00:00 2001 From: Daniel Smith Date: Sat, 28 Nov 2020 21:24:40 -0500 Subject: Initial commit `Tokenizer` can _just barely_ parse a basic, well-formed move list. Initially, I wanted to provide the movetext as a `Stream` rather than a string, the idea being that it could be processed as it was being read from a file without having to read the entire file into memory first. I had difficulties with the stream being unreadable in `Tokenizer.ParseMoves()`, so I switched to a string in order to get the actual parsing logic down first. Because of the `yield return` strategy, the debug console output includes all of the expected halfmoves multiple times in various orders. After running a test, generally the full, in-order list seems to exist at the bottom of the output. --- DotnetPgn/Tokenizer.cs | 75 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 75 insertions(+) create mode 100644 DotnetPgn/Tokenizer.cs (limited to 'DotnetPgn/Tokenizer.cs') diff --git a/DotnetPgn/Tokenizer.cs b/DotnetPgn/Tokenizer.cs new file mode 100644 index 0000000..9f779cb --- /dev/null +++ b/DotnetPgn/Tokenizer.cs @@ -0,0 +1,75 @@ +using System; +using System.Collections.Generic; +using System.IO; +using System.Text; +using System.Text.RegularExpressions; +using DotnetPgn.Models; + +namespace DotnetPgn +{ + public static class Tokenizer + { + private static readonly Regex s_halfMoveRegex = + new(@"([KQRBNP]?)([a-h]?[1-8]?)(x?)([a-h][1-8])([+#]?)"); + + public static IEnumerable ParseMoves(string moveText) + { + StringBuilder currToken = new(); + char[] moveTextChars = moveText.ToCharArray(); + int moveNumber = 1; + Player currPlayer = Player.White; + + for (int i = 0; i < moveTextChars.Length; i++) + { + char nextChar = moveTextChars[i]; + + if (nextChar is ' ' or '\t' or '\n' or '\r') + { + // We should have either a move number or a complete halfmove. + string token = currToken.ToString(); + currToken.Clear(); + Match match = s_halfMoveRegex.Match(token); + + if (!match.Success) + { + Console.WriteLine($"Unrecognized token: `{token}`"); + Console.WriteLine(); + continue; + } + + Console.WriteLine($"Recognized token: {token}"); + Console.WriteLine($"Matching groups: ({match.Groups[1].Value})" + + $"({match.Groups[2].Value})({match.Groups[3].Value})({match.Groups[4].Value})" + + $"({match.Groups[5].Value})"); + + var move = new HalfMove + { + MoveNumber = moveNumber, + Player = currPlayer, + Piece = PieceParser.ParsePiece(match.Groups[1].Value), + TargetSquare = new Square(match.Groups[4].Value[0], Convert.ToInt32(Char.GetNumericValue(match.Groups[4].Value[1]))), + }; + + Console.WriteLine($"Halfmove: {move.MoveNumber}. {move.Player} {move.Piece} from {move.SourceSquare} to {move.TargetSquare}"); + Console.WriteLine(); + + if (currPlayer == Player.White) + { + currPlayer = Player.Black; + } + else + { + currPlayer = Player.White; + moveNumber++; + } + + yield return move; + } + else + { + currToken.Append(nextChar); + } + } + } + } +} \ No newline at end of file -- cgit v1.2.3