ゲームプログラミングのプログラミング例

対象としてはオセロを選ぶ，オセロのルールは以下のようになっている．オセロのルール

ゲームによらない探索の一般化をして，クラスを定義してみる．このようなことには，オブジェクト指向言語は向いている．

// 手とその評価値を表す抽象クラス
abstract class Move {
  // 評価値は doubleで表す
  double value;
  // 評価値の getter
  double getValue(){ return value; }
  // 評価値の setter
  void setValue(double value){ this.value=value; }
}

// 動かした盤面を元に戻せるように記憶
abstract class TrailStack{
}

// 盤面の状態を表す
abstract class State{
  // 次の可能手の MoveのVectorを返す
  abstract Vector nextMoves();
  // 手にしたがって盤面を変化させる．
  // TrailStackを返す
  abstract TrailStack doMove(Move move);
  // TrailStackに従って盤面を戻す
  abstract void undoMove(TrailStack trail);
  // 盤面の評価値
  abstract double eval();
}

// 次の手を決めるアルゴリズム
abstract class Search {
  // 次の手を返す
  abstract Move bestMove(State state);
}

このSearchのサブクラスとして，1手先まで読む探索プログラムを書いてみる．

// Depth1Search
// 可能手を全部生成して,その盤面の評価値で最良のものを選択する
//
class Depth1Search extends Search {
  // return null if there exists no moves
  Move bestMove(State state){
    // 可能な手を全部生成する
    Vector moves=state.nextMoves();
    int size=moves.size();
    // 最良の手が複数あるのでそれを管理する
    Vector bestMoves=new Vector();
    Move best=null;
    // 最良の手の値を負の無限大に設定しておく
    double bestVal=Double.NEGATIVE_INFINITY;
    for(int i=0;i< size;i++){
      Move move=(Move)moves.elementAt(i);
      // 1手進めてみる
      TrailStack trail=state.doMove(move);
      // 評価値を計算
      // 1手進んだ時(相手の番)の評価値なので符号を入れ替える．
      double moveVal= -state.eval(); 
      // 戻す
      state.undoMove(trail);
      move.setValue(moveVal);
      // 評価値がこれまでのbestを超えた時
      if(bestVal< moveVal){
	bestVal=moveVal;
	bestMoves=new Vector();
	bestMoves.addElement(move);
      }
      // 評価値がこれまでのbestと同じ時
      else if(bestVal==moveVal){
	bestMoves.addElement(move);
      }
    }
    // bestの中から乱数で選択
    int bestSize=bestMoves.size();
    int selected=(int)(Math.random()*bestSize);
    return (Move)bestMoves.elementAt(selected);
  }
}

MinMaxもほとんど一緒である．

//
// MinMax search
// 

class MinMaxSearch extends Search {
  // どの深さまで読むか?
  int maxLevel;
  // インスタンス生成時にレベルを設定
  MinMaxSearch(int maxLevel){
    this.maxLevel=maxLevel;
  }
  // レベルに従った評価値
  double eval(State state,int level){
    // 末端のレベルでは局面の評価値
    if(level==0) return state.eval();
    // そうでない時はベストの手の時の評価値
    Move best=bestMove(state,level);
    if(best==null) return state.eval();
    return best.getValue();
  }
  // 手の生成
  Move bestMove(State state){
    return bestMove(state,maxLevel);
  }
  // 可能な手がない時は0を返す
  Move bestMove(State state,int level){
    // 可能な手を全部生成する
    Vector moves=state.nextMoves();
    int size=moves.size();
    // 最良の手が複数あるのでそれを管理する
    Vector bestMoves=new Vector();
    Move best=null;
    // 最良の手の値を負の無限大に設定しておく
    double bestVal=Double.NEGATIVE_INFINITY;
    for(int i=0;i< size;i++){
      Move move=(Move)moves.elementAt(i);
      // 1手進めてみる
      TrailStack trail=state.doMove(move);
      // 評価値を計算
      // 1手進んだ時(相手の番)の評価値なので符号を入れ替える．
      double moveVal= -eval(state,level-1);
      // 戻す
      state.undoMove(trail);
      move.setValue(moveVal);
      // 評価値がこれまでのbestを超えた時
      if(bestVal< moveVal){
	bestVal=moveVal;
	bestMoves=new Vector();
	bestMoves.addElement(move);
      }
      // 評価値がこれまでのbestと同じ時
      else if(bestVal==moveVal){
	bestMoves.addElement(move);
      }
    }
    // bestの中から乱数で選択
    int bestSize=bestMoves.size();
    if(bestSize>0){
      int selected=(int)(Math.random()*bestSize);
      return (Move)bestMoves.elementAt(selected);
    }
    else return null;
  }
}

Alpha-Beta

//
// Alpha-beta Search
//
class AlphaBetaSearch extends Search {
  // どの深さまで読むか?
  int maxLevel;
  // インスタンス生成時にレベルを設定
  AlphaBetaSearch(int maxLevel){
    this.maxLevel=maxLevel;
  }
  // レベルに従った評価値
  // alpha
  double eval(State state,int level,double alpha, double beta){
    // 末端のレベルでは局面の評価値
    if(level==0) return state.eval();
    // そうでない時はベストの手の時の評価値
    Move best=bestMove(state,level,alpha,beta);
    if(best==null) return state.eval();
    return best.getValue();
  }
  // 手の生成
  Move bestMove(State state){
    return bestMove(state,maxLevel,Double.NEGATIVE_INFINITY,Double.POSITIVE_INFINITY);
  }
  // 可能な手がない時は0を返す
  // beta値よりも大きい値を返す場合は選ばれない
  Move bestMove(State state,int level,double alpha,double beta){
    // 可能な手を全部生成する
    Vector moves=state.nextMoves();
    int size=moves.size();
    // 最良の手が複数あるのでそれを管理する
    Vector bestMoves=new Vector();
    Move best=null;
    // 最良の手の値を負の無限大に設定しておく
    double bestVal=Double.NEGATIVE_INFINITY;
    for(int i=0;i< size;i++){
      Move move=(Move)moves.elementAt(i);
      // 1手進めてみる
      TrailStack trail=state.doMove(move);
      // 評価値を計算
      // 1手進んだ時(相手の番)の評価値なので符号を入れ替える．
      // bestValよりも小さい値は選ばれない
      double moveVal= -eval(state,level-1,-beta,-alpha);
      // 戻す
      state.undoMove(trail);
      move.setValue(moveVal);
      // 評価値がこれまでのbestを超えた時
      if(bestVal< moveVal){
        if(moveVal>beta){ // この木は刈られる
          return move;
        }
        bestVal=moveVal;
	if(bestVal>alpha) alpha=bestVal;
        bestMoves=new Vector();
        bestMoves.addElement(move);
      }
      // 評価値がこれまでのbestと同じ時
      else if(bestVal==moveVal){
        bestMoves.addElement(move);
      }
    }
    // bestの中から乱数で選択
    int bestSize=bestMoves.size();
    if(bestSize>0){
      int selected=(int)(Math.random()*bestSize);
      // return (Move)bestMoves.elementAt(selected);
      return (Move)bestMoves.elementAt(0);
    }
    else return null;
  }
}

次にオセロ固有の盤面，動きを定義してみる．

class OthelloMove extends Move{
  int position;
  OthelloMove(int position,double value){
    this.position=position;
    this.value=value;
  }
  OthelloMove(int position){
    this(position,0.0);
  }
  int getPosition(){ return position; }
  int getX(){ return position%10; }
  int getY(){ return position/10; }
}

class OthelloTrail extends TrailStack{
  int position;
  Vector reverses=new Vector();
  OthelloTrail(int position){
    this.position=position;
  }
  void add(int position){
    reverses.addElement(new Integer(position));
  }
  int getPosition(){ return position; }
  int size(){ return reverses.size(); }
  int get(int index){ return ((Integer)(reverses.get(index))).intValue(); }
}

class OthelloState extends State{
  static final int Black=1;
  static final int White=-1;
  static final int Empty=0;
  // 盤外かどうかのチェックをさぼるためには以下のチェックが有効
  static final int Edge=2;
  int board[]=new int[10*10];
  int turn;
  // 初期状態のオセロ盤を作る
  OthelloState(){
    int color;
    for(int x=0;x<10;x++)
      for(int y=0;y<10;y++){
	if(x==0 || x==9 || y==0 || y==9) color=Edge;
	else if((x==4 && y==4)||(x==5 && y==5)) color=White;
	else if((x==4 && y==5)||(x==5 && y==4)) color=Black;
	else color=Empty;
	board[y*10+x]=color;
      }
    turn=Black;
  }
  static final int directions[]={-11,-10,-9,-1,1,9,10,11};
  int opponent(int turn){ return -turn; }
  boolean checkDir(int position,int dir){
    //    System.out.println("checkDir("+position+","+dir+")");
    int oturn=opponent(turn),piece;
    if(board[position+dir]!=oturn) return false;
    for(position+=dir*2;(piece=board[position])==oturn;position+=dir){
      //      System.out.println("board["+position+"]="+piece);
    }
    //    System.out.println("lastpiece="+piece+",turn="+turn);
    if(piece!=turn) return false;
    return true;
  }
  void reverseDir(int position,int dir,OthelloTrail trail){
    int oturn=opponent(turn),piece;
    for(position+=dir;(piece=board[position])==oturn;position+=dir){
      board[position]=turn;
      trail.add(position);
    }
  }
  boolean legalMove(int position){
    if(board[position]!=Empty)return false;
    for(int dir=0;dir< directions.length;dir++)
      if(checkDir(position,directions[dir]))
	return true;
    return false;
  }
  boolean legalMove(int x, int y){
    if(1<=x && x<=8 && 1<=y && y<=8 && legalMove(y*10+x)) return true;
    return false;
  }
  Vector nextMoves(){
    Vector ret=new Vector();
    for(int x=1;x<9;x++)
      for(int y=1;y<9;y++){
	int position=y*10+x;
	if(legalMove(position))
	  ret.add(new OthelloMove(position));
      }
    return ret;
  }
  boolean canPass(){
    for(int x=1;x<9;x++)
      for(int y=1;y<9;y++){
	int position=y*10+x;
	if(legalMove(position)) return false;
      }
    return true;
  }
  void doPass(){
    turn=opponent(turn);
  }
  TrailStack doMove(Move gmove){
    OthelloMove move=(OthelloMove)gmove;
    int position=move.getPosition();
    OthelloTrail trail=new OthelloTrail(position);
    
    board[position]=turn;
    for(int dir=0;dir< directions.length;dir++)
      if(checkDir(position,directions[dir]))
	reverseDir(position,directions[dir],trail);
    turn=opponent(turn);
    return trail;
  }
  //
  void undoMove(TrailStack tstack){
    OthelloTrail trail=(OthelloTrail)tstack;
    board[trail.getPosition()]=Empty;
    int size=trail.size();
    for(int i=0;i< size;i++){
      int position=trail.get(i);
      board[position]=turn;
    }
    turn=opponent(turn);
  }
  // 一番単純な石の数による評価関数
  double eval(){
    int sum=0;
    for(int y=1;y<9;y++)
      for(int x=1;x<9;x++)
	sum+=board[y*10+x];
    return (double)(turn*sum);
  }
  int result(){
    int sum=0;
    for(int y=1;y<9;y++)
      for(int x=1;x<9;x++)
	sum+=board[y*10+x];
    return sum;
  }
  //
  void showState(){
    System.out.println(" abcdefgh");
    for(int y=1;y<9;y++){
      System.out.print(y);
      for(int x=1;x<9;x++){
	System.out.print("O.@".charAt(board[y*10+x]+1));
      }
      System.out.println();
    }
  }
}

評価関数も入れているところがあまりきれいでない．ユーザインタフェース部分は，今回は GUI を使わずにCUIでやってみる．

class HumanSearch extends Search {
  BufferedReader d;
  HumanSearch(BufferedReader d){
    this.d=d;
  }
  Move bestMove(State state){
    OthelloState game=(OthelloState)state;
    try{
      while(true){
	System.out.println("次の手を入力してください(例 d6)");
	String str=d.readLine();
	if(str.length()==2){
	  int x=" abcdefgh".indexOf(str.charAt(0));
	  int y=" 12345678".indexOf(str.charAt(1));
	  if(1<=x && x<=8 && 1<=y && y<=8){
	    if(game.legalMove(x,y)){
	      return new OthelloMove(y*10+x);
	    }
	    else
	      System.out.println(str+"の地点には打てません");
	  }
	  else
	    System.out.println("入力の形式が間違っています"+x+","+y);
	}
	else
	  System.out.println("入力は半角2文字でなくてはいけません");
      }
    }
    catch(IOException e){
    }
    return null;
  }
}

class Othello {
  OthelloState game;
  // コンストラクタ
  Othello(){
    game=new OthelloState();
  }
  // 終局
  void gameEnd(){
    int result=game.result();
    if(result>0)
      System.out.println("黒が"+result+"個勝ちました");
    else if(result<0)
      System.out.println("白が"+(-result)+"個勝ちました");
    else
      System.out.println("引き分けです");
    System.exit(0);
  }
  void startGame(Search players[]) throws IOException {
    for(int player=0;;player=1-player){
      game.showState();
      if(game.canPass()){
	game.doPass();
	if(game.canPass()){
	  gameEnd();
	}
	System.out.println("次に打てる手はないのでパスします");
      }
      else{
	int x,y;
	OthelloMove bestMove=(OthelloMove)players[player].bestMove(game);
	System.out.println("着手は "+
			   " abcdefgh".charAt(bestMove.getX())+
			   " 12345678".charAt(bestMove.getY())+"です．");
	System.out.println("評価値は "+bestMove.getValue()+"です．");
	game.doMove(bestMove);
      }
    }
  }
  // main関数
  public static void main(String args[]) throws IOException{
    Othello othello=new Othello();
    BufferedReader d=new BufferedReader(new InputStreamReader(System.in));
    Search players[]=new Search[2];
    players[0]=new HumanSearch(d);
    //    players[0]=new Depth1Search();
    //    players[0]=new MinMaxSearch(6);
    players[1]=new MinMaxSearch(6);
    othello.startGame(players);
  }
}

できあがったプログラム全体は，Othello.javaである．

弱いプログラムだが，簡単に書けて，探索アルゴリズムを変化させるのも容易にできる．

現在，一番強いとされるオセロプログラムは LOGISTELLO で，人間のチャンピオン相手に大差で勝つ実力を備えている．作者の Michael Buroは LOGISTELLO で使われている技術情報をかなり公開しているので，オセロプログラムを作る人には大変参考になるはずである．

次に進む