summary history branches tags files
sizer.hs
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RecordWildCards #-}

import Control.Exception (Exception, IOException, try, throwIO)
import Data.Aeson qualified as Aeson
import Data.Aeson (ToJSON(..), FromJSON(..), Value(..), (.:), (.=), object)
import Data.Aeson.Types (prependFailure, typeMismatch)
import Data.Bits (shiftR)
import Data.List (isPrefixOf, isSuffixOf, stripPrefix)
import Data.Maybe (catMaybes, fromMaybe)
import Data.Set (Set)
import Data.Set qualified as Set
import Data.Text (Text)
import GHC.Base (quotInt)
import Relude (foldMapM, sortWith, mapAccumL)
import System.Directory (doesDirectoryExist, listDirectory, pathIsSymbolicLink)
import System.IO (stderr, hPutStrLn)
import System.Posix.Files qualified as Posix
import System.Process.Typed (readProcess_, proc)
import System.ProgressBar (hNewProgressBar)
import System.ProgressBar qualified as ProgressBar
import Text.Printf (printf)

rootsBase :: FilePath
rootsBase = "/nix/var/nix/gcroots"

main :: IO ()
main = do
  gcrootPaths <- findRoots rootsBase
  hPutStrLn stderr "Scanning gcroots..."
  pb <- hNewProgressBar stderr ProgressBar.defStyle 10 (ProgressBar.Progress 0 (length gcrootPaths) ())
  gcroots <- catMaybes <$> mapM (collectAndLog pb) gcrootPaths
  let sorted = sortWith sortRootKey gcroots
  let (_, final) = mapAccumL analysisStep emptyAnalysisState sorted
  mapM_ printResult final
  where
    collectAndLog pb root =
      collectGcroot root
        <* ProgressBar.incProgress pb 1

    sortRootKey root =
      ( negate $ sortPriority $ gcrootPath root
      , negate $ totalSize $ gcrootDependencies root
      , gcrootPath root
      )

sortPriority :: FilePath -> Int
sortPriority root
  | isPrefixOf "current-system" root = 51
  | isPrefixOf "booted-system" root = 50
  | isPrefixOf "per-user/root" root = 40
  | isPrefixOf "per-user/" root && isSuffixOf "/current-home" root = 31
  | isPrefixOf "per-user/" root = 30
  | isPrefixOf "auto/" root = 0
  | otherwise = 10

findRoots :: FilePath -> IO [FilePath]
findRoots path = do
  isLink <- pathIsSymbolicLink path
  isDir <- doesDirectoryExist path
  case (isLink, isDir) of
    (True, _) -> return [ path ]
    (False, True) -> do
      all <- fmap ((path <> "/") <>) <$> listDirectory path
      foldMapM findRoots all

isBrokenLink :: FilePath -> IO Bool
isBrokenLink path = do
  isLink <- pathIsSymbolicLink path
  if isLink
    then do
      status <- try @IOException $ Posix.getFileStatus path
      case status of
        Left _ -> return True
        Right _ -> return False
    else
      return False

data Gcroot = Gcroot
  { gcrootPath :: FilePath
  , gcrootDependencies :: [PathInfo]
  }

collectGcroot :: FilePath -> IO (Maybe Gcroot)
collectGcroot gcroot = do
  isBroken <- isBrokenLink gcroot
  if isBroken
    then
      return Nothing
    else do
      (json, _) <- readProcess_ $ proc "nix" [ "--extra-experimental-features", "nix-command"
                                            , "path-info"
                                            , "--recursive"
                                            , "--size"
                                            , "--json"
                                            , gcroot
                                            ]
      let pathInfos_ = Aeson.eitherDecode json :: Either String [PathInfo]
      case pathInfos_ of
        Left err ->
          throwIO $ PathInfoException err
        Right pathInfos ->
          return $ Just $ Gcroot (fromMaybe gcroot $ stripPrefix (rootsBase <> "/") gcroot) pathInfos

data PathInfoException =
  PathInfoException String
  deriving (Show)

instance Exception PathInfoException

printResult :: (Gcroot, AnalysisResult) -> IO ()
printResult (gcroot, result) = do
  printf "%7s  %7s  %s\n"
    (formatFileSize $ resultTotalSize result)
    (formatFileSize $ resultUnseenSize result)
    (gcrootPath gcroot)

totalSize :: (Foldable f, Functor f) => f PathInfo -> Int
totalSize = sum . fmap pathInfoNarSize

formatFileSize :: Int -> String
formatFileSize s
  | s >= 10*(2^50) = show (shiftR s 50) ++ "P"
  | s >= 10*(2^40) = show (shiftR s 40) ++ "T"
  | s >= 10*(2^30) = show (shiftR s 30) ++ "G"
  | s >= 10*(2^20) = show (shiftR s 20) ++ "M"
  | s >= 10*(2^10) = show (shiftR s 10) ++ "k"
  | otherwise = show s

data AnalysisState = AnalysisState
  { analysisStateSeen :: Set Text
  }

emptyAnalysisState :: AnalysisState
emptyAnalysisState =
  AnalysisState mempty

data AnalysisResult = AnalysisResult
  { resultTotalSize :: Int
  , resultUnseenSize :: Int
  }

analysisStep :: AnalysisState -> Gcroot -> (AnalysisState, (Gcroot, AnalysisResult))
analysisStep (AnalysisState seen) root =
  let
    dependencies :: [PathInfo]
    dependencies = gcrootDependencies root

    unseenDependencies :: [PathInfo]
    unseenDependencies =
      filter (flip Set.notMember seen . pathInfoPath) dependencies
  in
  ( AnalysisState (seen <> Set.fromList (fmap pathInfoPath unseenDependencies))
  , ( root
    , AnalysisResult
        { resultTotalSize = totalSize dependencies
        , resultUnseenSize = totalSize unseenDependencies
        }
    )
  )
  where
    toSeenEntry pathInfo =
      (pathInfoPath pathInfo, pathInfoNarSize pathInfo)

data PathInfo = PathInfo
  { pathInfoNarSize :: Int
  , pathInfoPath :: Text
  } deriving (Show, Eq)

instance FromJSON PathInfo where
  parseJSON (Object v) = do
    pathInfoNarSize <- v .: "narSize"
    pathInfoPath <- v .: "path"
    pure $ PathInfo{..}
  parseJSON invalid = do
    prependFailure "parsing PathInfo failed, "
      (typeMismatch "Object" invalid)