From df8253dbf6cb09093018333f99dd9c19ba0ff02b Mon Sep 17 00:00:00 2001 From: Daniel Smith Date: Sat, 28 Nov 2020 21:24:40 -0500 Subject: Initial commit `Tokenizer` can _just barely_ parse a basic, well-formed move list. Initially, I wanted to provide the movetext as a `Stream` rather than a string, the idea being that it could be processed as it was being read from a file without having to read the entire file into memory first. I had difficulties with the stream being unreadable in `Tokenizer.ParseMoves()`, so I switched to a string in order to get the actual parsing logic down first. Because of the `yield return` strategy, the debug console output includes all of the expected halfmoves multiple times in various orders. After running a test, generally the full, in-order list seems to exist at the bottom of the output. --- DotnetPgn/DotnetPgn.csproj | 7 +++++ DotnetPgn/Models/Enums.cs | 18 +++++++++++ DotnetPgn/Models/HalfMove.cs | 11 +++++++ DotnetPgn/Models/Square.cs | 22 +++++++++++++ DotnetPgn/PieceParser.cs | 22 +++++++++++++ DotnetPgn/Tokenizer.cs | 75 ++++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 155 insertions(+) create mode 100644 DotnetPgn/DotnetPgn.csproj create mode 100644 DotnetPgn/Models/Enums.cs create mode 100644 DotnetPgn/Models/HalfMove.cs create mode 100644 DotnetPgn/Models/Square.cs create mode 100644 DotnetPgn/PieceParser.cs create mode 100644 DotnetPgn/Tokenizer.cs (limited to 'DotnetPgn') diff --git a/DotnetPgn/DotnetPgn.csproj b/DotnetPgn/DotnetPgn.csproj new file mode 100644 index 0000000..563e6f9 --- /dev/null +++ b/DotnetPgn/DotnetPgn.csproj @@ -0,0 +1,7 @@ + + + + net5.0 + + + diff --git a/DotnetPgn/Models/Enums.cs b/DotnetPgn/Models/Enums.cs new file mode 100644 index 0000000..893f2fa --- /dev/null +++ b/DotnetPgn/Models/Enums.cs @@ -0,0 +1,18 @@ +namespace DotnetPgn.Models +{ + public enum Piece + { + Pawn, + Knight, + Bishop, + Rook, + Queen, + King, + } + + public enum Player + { + White, + Black, + } +} \ No newline at end of file diff --git a/DotnetPgn/Models/HalfMove.cs b/DotnetPgn/Models/HalfMove.cs new file mode 100644 index 0000000..b94d77d --- /dev/null +++ b/DotnetPgn/Models/HalfMove.cs @@ -0,0 +1,11 @@ +namespace DotnetPgn.Models +{ + public record HalfMove + { + public int MoveNumber {get; init;} + public Player Player { get; init; } + public Piece Piece { get; init; } + public Square SourceSquare { get; init; } + public Square TargetSquare { get; init; } + } +} \ No newline at end of file diff --git a/DotnetPgn/Models/Square.cs b/DotnetPgn/Models/Square.cs new file mode 100644 index 0000000..a3910d3 --- /dev/null +++ b/DotnetPgn/Models/Square.cs @@ -0,0 +1,22 @@ +using System; + +namespace DotnetPgn.Models +{ + public record Square + { + public char Rank { get; } + public int File { get; } + + public Square(char rank, int file) + { + if (rank < 'a' || rank > 'h') + throw new ArgumentException($"Invalid rank '{rank}'."); + + if (file < 1 || file > 8) + throw new ArgumentException($"Invalid file '{file}'."); + + Rank = rank; + File = file; + } + } +} \ No newline at end of file diff --git a/DotnetPgn/PieceParser.cs b/DotnetPgn/PieceParser.cs new file mode 100644 index 0000000..5ea9402 --- /dev/null +++ b/DotnetPgn/PieceParser.cs @@ -0,0 +1,22 @@ +using System; +using DotnetPgn.Models; + +namespace DotnetPgn +{ + public static class PieceParser + { + public static Piece ParsePiece(string sanPiece) + { + return sanPiece switch + { + "P" or "p" or "" => Piece.Pawn, + "N" or "n" => Piece.Knight, + "B" or "b" => Piece.Bishop, + "R" or "r" => Piece.Rook, + "Q" or "q" => Piece.Queen, + "K" or "k" => Piece.King, + _ => throw new ArgumentException($"'{sanPiece} is not a valid piece.'"), + }; + } + } +} diff --git a/DotnetPgn/Tokenizer.cs b/DotnetPgn/Tokenizer.cs new file mode 100644 index 0000000..9f779cb --- /dev/null +++ b/DotnetPgn/Tokenizer.cs @@ -0,0 +1,75 @@ +using System; +using System.Collections.Generic; +using System.IO; +using System.Text; +using System.Text.RegularExpressions; +using DotnetPgn.Models; + +namespace DotnetPgn +{ + public static class Tokenizer + { + private static readonly Regex s_halfMoveRegex = + new(@"([KQRBNP]?)([a-h]?[1-8]?)(x?)([a-h][1-8])([+#]?)"); + + public static IEnumerable ParseMoves(string moveText) + { + StringBuilder currToken = new(); + char[] moveTextChars = moveText.ToCharArray(); + int moveNumber = 1; + Player currPlayer = Player.White; + + for (int i = 0; i < moveTextChars.Length; i++) + { + char nextChar = moveTextChars[i]; + + if (nextChar is ' ' or '\t' or '\n' or '\r') + { + // We should have either a move number or a complete halfmove. + string token = currToken.ToString(); + currToken.Clear(); + Match match = s_halfMoveRegex.Match(token); + + if (!match.Success) + { + Console.WriteLine($"Unrecognized token: `{token}`"); + Console.WriteLine(); + continue; + } + + Console.WriteLine($"Recognized token: {token}"); + Console.WriteLine($"Matching groups: ({match.Groups[1].Value})" + + $"({match.Groups[2].Value})({match.Groups[3].Value})({match.Groups[4].Value})" + + $"({match.Groups[5].Value})"); + + var move = new HalfMove + { + MoveNumber = moveNumber, + Player = currPlayer, + Piece = PieceParser.ParsePiece(match.Groups[1].Value), + TargetSquare = new Square(match.Groups[4].Value[0], Convert.ToInt32(Char.GetNumericValue(match.Groups[4].Value[1]))), + }; + + Console.WriteLine($"Halfmove: {move.MoveNumber}. {move.Player} {move.Piece} from {move.SourceSquare} to {move.TargetSquare}"); + Console.WriteLine(); + + if (currPlayer == Player.White) + { + currPlayer = Player.Black; + } + else + { + currPlayer = Player.White; + moveNumber++; + } + + yield return move; + } + else + { + currToken.Append(nextChar); + } + } + } + } +} \ No newline at end of file -- cgit v1.2.3