aboutsummaryrefslogtreecommitdiff
path: root/DotnetPgn/Tokenizer.cs
diff options
context:
space:
mode:
authorGravatar Daniel Smith <rdnlsmith@gmail.com> 2020-11-28 21:24:40 -0500
committerGravatar Daniel Smith <rdnlsmith@gmail.com> 2020-11-28 21:24:40 -0500
commitdf8253dbf6cb09093018333f99dd9c19ba0ff02b (patch)
tree455776153a088e3bf164bc390813043a7cbe8520 /DotnetPgn/Tokenizer.cs
Initial commit
`Tokenizer` can _just barely_ parse a basic, well-formed move list. Initially, I wanted to provide the movetext as a `Stream` rather than a string, the idea being that it could be processed as it was being read from a file without having to read the entire file into memory first. I had difficulties with the stream being unreadable in `Tokenizer.ParseMoves()`, so I switched to a string in order to get the actual parsing logic down first. Because of the `yield return` strategy, the debug console output includes all of the expected halfmoves multiple times in various orders. After running a test, generally the full, in-order list seems to exist at the bottom of the output.
Diffstat (limited to 'DotnetPgn/Tokenizer.cs')
-rw-r--r--DotnetPgn/Tokenizer.cs75
1 files changed, 75 insertions, 0 deletions
diff --git a/DotnetPgn/Tokenizer.cs b/DotnetPgn/Tokenizer.cs
new file mode 100644
index 0000000..9f779cb
--- /dev/null
+++ b/DotnetPgn/Tokenizer.cs
@@ -0,0 +1,75 @@
+using System;
+using System.Collections.Generic;
+using System.IO;
+using System.Text;
+using System.Text.RegularExpressions;
+using DotnetPgn.Models;
+
+namespace DotnetPgn
+{
+ public static class Tokenizer
+ {
+ private static readonly Regex s_halfMoveRegex =
+ new(@"([KQRBNP]?)([a-h]?[1-8]?)(x?)([a-h][1-8])([+#]?)");
+
+ public static IEnumerable<HalfMove> ParseMoves(string moveText)
+ {
+ StringBuilder currToken = new();
+ char[] moveTextChars = moveText.ToCharArray();
+ int moveNumber = 1;
+ Player currPlayer = Player.White;
+
+ for (int i = 0; i < moveTextChars.Length; i++)
+ {
+ char nextChar = moveTextChars[i];
+
+ if (nextChar is ' ' or '\t' or '\n' or '\r')
+ {
+ // We should have either a move number or a complete halfmove.
+ string token = currToken.ToString();
+ currToken.Clear();
+ Match match = s_halfMoveRegex.Match(token);
+
+ if (!match.Success)
+ {
+ Console.WriteLine($"Unrecognized token: `{token}`");
+ Console.WriteLine();
+ continue;
+ }
+
+ Console.WriteLine($"Recognized token: {token}");
+ Console.WriteLine($"Matching groups: ({match.Groups[1].Value})" +
+ $"({match.Groups[2].Value})({match.Groups[3].Value})({match.Groups[4].Value})" +
+ $"({match.Groups[5].Value})");
+
+ var move = new HalfMove
+ {
+ MoveNumber = moveNumber,
+ Player = currPlayer,
+ Piece = PieceParser.ParsePiece(match.Groups[1].Value),
+ TargetSquare = new Square(match.Groups[4].Value[0], Convert.ToInt32(Char.GetNumericValue(match.Groups[4].Value[1]))),
+ };
+
+ Console.WriteLine($"Halfmove: {move.MoveNumber}. {move.Player} {move.Piece} from {move.SourceSquare} to {move.TargetSquare}");
+ Console.WriteLine();
+
+ if (currPlayer == Player.White)
+ {
+ currPlayer = Player.Black;
+ }
+ else
+ {
+ currPlayer = Player.White;
+ moveNumber++;
+ }
+
+ yield return move;
+ }
+ else
+ {
+ currToken.Append(nextChar);
+ }
+ }
+ }
+ }
+} \ No newline at end of file