Unicode console I/O in Haskell on Windows Unicode console I/O in Haskell on Windows windows windows

Unicode console I/O in Haskell on Windows


I thought I would answer my own question, and list as one possible answer, the following, which is what I'm actually doing at the moment. It is quite possible that one can do better, which is why I'm asking the question! But I thought it would make sense to make the following available to people. It's basically a translation from Python to Haskell of this python workaround for the same issue. It uses 'option B' mentioned in the question.

The basic idea is that you create a module IOUtil.hs, with the following content, which you can import into your code:

{-# LANGUAGE ForeignFunctionInterface #-}{-# LANGUAGE CPP #-}{-# LANGUAGE NoImplicitPrelude #-}module IOUtil (  IOUtil.interact,  IOUtil.putChar, IOUtil.putStr, IOUtil.putStrLn, IOUtil.print,  IOUtil.getChar, IOUtil.getLine, IOUtil.getContents, IOUtil.readIO,  IOUtil.readLn,  ePutChar, ePutStr, ePutStrLn, ePrint,  trace, traceIO  ) where#ifdef mingw32_HOST_OSimport System.Win32.Types (BOOL, HANDLE, DWORD, LPDWORD, LPWSTR, LPCWSTR, LPVOID)import Foreign.C.Types (CWchar)import Foreignimport Prelude hiding (getContents, putStr, putStrLn) --(IO, Read, Show, String)--import qualified System.IOimport qualified System.IO (getContents)import System.IO hiding (getContents, putStr, putStrLn)import Data.Char (ord) {- <http://msdn.microsoft.com/en-us/library/ms683231(VS.85).aspx>    HANDLE WINAPI GetStdHandle(DWORD nStdHandle);    returns INVALID_HANDLE_VALUE, NULL, or a valid handle -}foreign import stdcall unsafe "GetStdHandle" win32GetStdHandle :: DWORD -> IO (HANDLE)std_OUTPUT_HANDLE = -11 :: DWORD  -- all DWORD arithmetic is performed modulo 2^nstd_ERROR_HANDLE  = -12 :: DWORD {- <http://msdn.microsoft.com/en-us/library/aa364960(VS.85).aspx>    DWORD WINAPI GetFileType(HANDLE hFile); -}foreign import stdcall unsafe "GetFileType" win32GetFileType :: HANDLE -> IO (DWORD)_FILE_TYPE_CHAR   = 0x0002 :: DWORD_FILE_TYPE_REMOTE = 0x8000 :: DWORD {- <http://msdn.microsoft.com/en-us/library/ms683167(VS.85).aspx>    BOOL WINAPI GetConsoleMode(HANDLE hConsole, LPDWORD lpMode); -}foreign import stdcall unsafe "GetConsoleMode" win32GetConsoleMode :: HANDLE -> LPDWORD -> IO (BOOL)_INVALID_HANDLE_VALUE = (intPtrToPtr $ -1) :: HANDLEis_a_console :: HANDLE -> IO (Bool)is_a_console handle  = if (handle == _INVALID_HANDLE_VALUE) then return False      else do ft <- win32GetFileType handle              if ((ft .&. complement _FILE_TYPE_REMOTE) /= _FILE_TYPE_CHAR) then return False                else do ptr <- malloc                        cm  <- win32GetConsoleMode handle ptr                        free ptr                        return cmreal_stdout :: IO (Bool)real_stdout = is_a_console =<< win32GetStdHandle std_OUTPUT_HANDLEreal_stderr :: IO (Bool)real_stderr = is_a_console =<< win32GetStdHandle std_ERROR_HANDLE {- BOOL WINAPI WriteConsoleW(HANDLE hOutput, LPWSTR lpBuffer, DWORD nChars,                              LPDWORD lpCharsWritten, LPVOID lpReserved); -}foreign import stdcall unsafe "WriteConsoleW" win32WriteConsoleW  :: HANDLE -> LPWSTR -> DWORD -> LPDWORD -> LPVOID -> IO (BOOL)data ConsoleInfo = ConsoleInfo Int (Ptr CWchar) (Ptr DWORD) HANDLEwriteConsole :: ConsoleInfo -> [Char] -> IO ()writeConsole (ConsoleInfo bufsize buf written handle) string  = let fillbuf :: Int -> [Char] -> IO ()        fillbuf i [] = emptybuf buf i []        fillbuf i remain@(first:rest)          | i + 1 < bufsize && ordf <= 0xffff = do pokeElemOff buf i asWord                                                   fillbuf (i+1) rest          | i + 1 < bufsize && ordf >  0xffff = do pokeElemOff buf i word1                                                   pokeElemOff buf (i+1) word2                                                   fillbuf (i+2) rest          | otherwise                         = emptybuf buf i remain          where ordf   = ord first                asWord = fromInteger (toInteger ordf) :: CWchar                sub    = ordf - 0x10000                word1' = ((shiftR sub 10) .&. 0x3ff) + 0xD800                word2' = (sub .&. 0x3FF)             + 0xDC00                word1  = fromInteger . toInteger $ word1'                word2  = fromInteger . toInteger $ word2'        emptybuf :: (Ptr CWchar) -> Int -> [Char] -> IO ()        emptybuf _ 0 []     = return ()        emptybuf _ 0 remain = fillbuf 0 remain        emptybuf ptr nLeft remain          = do let nLeft'    = fromInteger . toInteger $ nLeft               ret          <- win32WriteConsoleW handle ptr nLeft' written nullPtr               nWritten     <- peek written               let nWritten' = fromInteger . toInteger $ nWritten               if ret && (nWritten > 0)                  then emptybuf (ptr `plusPtr` (nWritten' * szWChar)) (nLeft - nWritten') remain                  else fail "WriteConsoleW failed.\n"    in  fillbuf 0 stringszWChar = sizeOf (0 :: CWchar)makeConsoleInfo :: DWORD -> Handle -> IO (Either ConsoleInfo Handle)makeConsoleInfo nStdHandle fallback  = do handle     <- win32GetStdHandle nStdHandle       is_console <- is_a_console handle       let bufsize = 10000       if not is_console then return $ Right fallback         else do buf     <- mallocBytes (szWChar * bufsize)                 written <- malloc                 return . Left $ ConsoleInfo bufsize buf written handle{-# NOINLINE stdoutConsoleInfo #-}stdoutConsoleInfo :: Either ConsoleInfo HandlestdoutConsoleInfo = unsafePerformIO $ makeConsoleInfo std_OUTPUT_HANDLE stdout{-# NOINLINE stderrConsoleInfo #-}stderrConsoleInfo :: Either ConsoleInfo HandlestderrConsoleInfo = unsafePerformIO $ makeConsoleInfo std_ERROR_HANDLE stderrinteract     :: (String -> String) -> IO ()interact f   = do s <- getContents                  putStr (f s)conPutChar ci  = writeConsole ci . replicate 1conPutStr      = writeConsoleconPutStrLn ci = writeConsole ci . ( ++ "\n")putChar      :: Char -> IO ()putChar      = (either conPutChar  hPutChar ) stdoutConsoleInfoputStr       :: String -> IO ()putStr       = (either conPutStr   hPutStr  ) stdoutConsoleInfoputStrLn     :: String -> IO ()putStrLn     = (either conPutStrLn hPutStrLn) stdoutConsoleInfoprint        :: Show a => a -> IO ()print        = putStrLn . showgetChar      = System.IO.getChargetLine      = System.IO.getLinegetContents  = System.IO.getContentsreadIO       :: Read a => String -> IO areadIO       = System.IO.readIOreadLn       :: Read a => IO areadLn       = System.IO.readLnePutChar     :: Char -> IO ()ePutChar     = (either conPutChar  hPutChar ) stderrConsoleInfoePutStr     :: String -> IO ()ePutStr      = (either conPutStr   hPutStr  ) stderrConsoleInfoePutStrLn   :: String -> IO ()ePutStrLn    = (either conPutStrLn hPutStrLn) stderrConsoleInfoePrint       :: Show a => a -> IO ()ePrint       = ePutStrLn . show#elseimport qualified System.IOimport Prelude (IO, Read, Show, String)interact     = System.IO.interactputChar      = System.IO.putCharputStr       = System.IO.putStrputStrLn     = System.IO.putStrLngetChar      = System.IO.getChargetLine      = System.IO.getLinegetContents  = System.IO.getContentsePutChar     = System.IO.hPutChar System.IO.stderrePutStr      = System.IO.hPutStr System.IO.stderrePutStrLn    = System.IO.hPutStrLn System.IO.stderrprint        :: Show a => a -> IO ()print        = System.IO.printreadIO       :: Read a => String -> IO areadIO       = System.IO.readIOreadLn       :: Read a => IO areadLn       = System.IO.readLnePrint       :: Show a => a -> IO ()ePrint       = System.IO.hPrint System.IO.stderr#endiftrace :: String -> a -> atrace string expr = unsafePerformIO $ do    traceIO string    return exprtraceIO :: String -> IO ()traceIO = ePutStrLn

then, you use the I/O functions therein contained instead of the standard library ones. They will detect whether output is redirected; if not (i.e. if we're writing to a 'real' console) then we'll bypass the usual Haskell I/O functions and write directly to the win32 console using WriteConsoleW, the unicode-aware win32 console function. On non-windows platforms, conditional compilation means that the functions here just call the standard-library ones.

If you need to print to stderr, you should use (e.g.) ePutStrLn, not hPutStrLn stderr; we don't define a hPutStrLn. (Defining one is an exercise for the reader!)