#!/bin/bash # Main script for turning collated molecule directories into dspace # import packages. # # Usage: makeBatches.sh [starting mol idx] [Batch size] [# of batches] batchMax=$3 batchSz=$2 strtId=$1 dataIn=data/import2 dataOut="data/batches/`date -u --iso-8601=seconds`" echo "Converting data from $dataIn to $dataOut" mkdir -p $dataOut for ((batch=0; batch < batchMax ; batch++)) do start=$(( batch*batchSz + strtId )) finish=$(($start + batchSz -1)) echo "Batch #$batch will contain molecules with ids from $start to $finish" batchdir="$dataOut/batch$start-$finish" mkdir -p $batchdir for ((mol=start; mol <= finish; mol++)) do molName="000000$mol" padMolName=${molName:$((${#molName}-6))} location=$dataIn/${padMolName:5:1}/${padMolName:4:1}/${padMolName:3:1}/${padMolName} if [ -e $location ] then cp -r "$location" "$batchdir" echo -n . else echo -n x fi done echo;echo "Data extracted for batch #$batch" ./extractMetadata.groovy "$batchdir" cd $batchdir/.. tar -cf "batch$start-$finish.tar" "batch$start-$finish" cd - done