Summary: 1397 instances, 1225 unique

Text	Count
// TODO replace by more performant version directly on GPU that does not require the round-trip over CPU.	2
// TODO assert only	1
_unknown_shape = (-2,) # TODO: take this from the catacombs of cntk_py	1
// TODO: Remove this version (with sanity checks) after this has been tested. Then the function can be inlined above.	1
// TODO: This could be more easily implemented as a unary operation, like PassNode.	1
// TODO: specific to LSTM. icfo (CNTK) to iofc(ONNX)	2
# TODO: This API is still suboptimal, and should be fixed as follows:	1
# TODO: remove the parallel application; instead	1
// TODO: if there is already a file, rename it	1
// TODO: Copy Constructor	1
# TODO: validate parameter values.	1
// TODO: Document what this is. It seems we can fill specific hard-coded inputs with something interesting.	1
//         Currently, this would cause a matrix/tensor dimension mismatch. --TODO: Is this comment up-to-date?	1
sliceInputGrad,                   // (out) gradient for data input goes here  --TODO: Check if cudnn engine adds the gradient, or just overwrites (BUGBUG). CNTK engine is OK.	1
// TODO: when gcc -v is 4.9 or greater, this should be: regex_replace((wstring)ppath, wregex(L"\\.[^\\.\\\\/:]*$"), wstring());	1
// TODO: what about double?	2
//TODO: According to my preliminary analysis, the second momentum variance scaling is different from momentum scaling; need to double check -- yuqing tang	1
// TODO: We need a better way to determine the ElementType for the network	1
// TODO: enable UVA p2p access once this is fixed.	2
argScope->Add(id, failfn, move(argVal)); // TODO: is the failfn the right one?	1
// inherit tensor dimension from sourceData, minus the last (column or time) dimension. TODO this needs to become simpler...	1
// dynamic loading of modules  --TODO: not Basics, should move to its own header	1
// TODO: ^^ change to shared_ptr or unique_ptr	1
// TODO:	3
// TODO: support transpose product	1
// TODO: This is currently being hardcoded to unsigned short for saving space, which means untied context-dependent phones	1
// TODO: can this be static?	1
// Parse - Parse the data   --TODO: is this doing the whole file or incrementally?	1
reduction_rank=1, # (0 means input has no depth dimension, e.g. audio signal or B&W image)  --TODO: call it item_rank?	1
/*TODO: merge with call site*/ void ForwardPropS(Matrix<ElemType>& invNorm0, Matrix<ElemType>& invNorm1, Matrix<ElemType>& functionValues, Matrix<ElemType>& in0, Matrix<ElemType>& in1, Matrix<ElemType>& in2, Matrix<ElemType>& in3, Matrix<ElemType>& leftTermTemp, Matrix<ElemType>& rightTermTemp)	1
// TODO: Move this out. Follow the same pattern as the RNN node. But can't without requiring another buffer.	1
#pragma optimize("", off) // TODO work around potential VS2015 code optimization bug, replacing virtual- by non-virtual call in Init() below	1
} // TODO: ain't there an overload for this?	1
if (!m_pMBLayout) // TODO: temporary workaround to Check_t() calls which call this. TODO: Delete the first arg from Check_t() after memshare merge.	1
// TODO: This is a little weird. Rather, this should be done by the call site.	1
// TODO: merge this with todouble(const char*) above	1
// TODO: do progressWriters below also have a similar issue?	1
// TODO: decrease randomization window if m_deserializers.size() > 1 ?	1
# TODO: Let's switch to import logging in the future instead of print. [ebarsoum]	1
double totalfwscore = 0; // TODO: name no longer precise in sMBRmode	1
// TODO: This is a stopgap. Is this the right thing to do? It changes the matrix type in-place.	2
return true; // TODO: what's this return value for?	1
// TODO: can ConfigRecordPtr be IConfigRecordPtr?	1
alignoffsets[L.edges.size()] = (unsigned int) alignbufsize; // (TODO: remove if not actually needed)	1
/// TODO: Currently chunk->SequenceInfos() == deserializer->SequenceInfo(chunk),	1
typedef unsigned int LabelIdType;   // input token mapped to an integer  --TODO: why not size_t? Does this save space?	1
// TODO: this may go away if we store classids directly in the utterance data	2
#ifndef _MSC_VER // TODO: what is the correct trigger for gcc?	2
// TODO: Check the CNTK Book why different left and right extents are not supported.	1
for (const auto& node : GetEvalOrder(rootNode)) // TODO: verify that no use of this requires the actual eval order, then change to GetAllNodesForRoot()	1
# TODO figure out a better/faster way.	1
// TODO: This should keep everything that is configured by the config.	1
if (std::find(iter->m_nestedNodes.begin(), iter->m_nestedNodes.end(), node) != iter->m_nestedNodes.end()) // TODO: should this loop need to be a method of SEQTraversalFlowControlNode?	1
// TODO: Support multiple concurrent backprop states	1
// TODO: We could copy the IConfigRecordPtr. That is allowed. Not trivial to do with template magic.	1
Eframescorrectbuf; // TODO: remove this [v-hansu]	1
pos++; // consume it   --TODO: value is not used after this	1
void SaveCheckPointInfo(const size_t epoch, const size_t totalSamplesSeen, // TODO: combine totalSamplesSeen and prevCriterion into a EpochCriterion type	1
// TODO: get H1	1
// TODO: Why can we not just pass m_prefetchMatrices?	1
# TODO: We still have those axes in the kernel. Solve this once the C++ implementation supports 1D directly.	2
virtual bool CanReadFor(wstring /* nodeName */) // return true if this reader can output for a node with name nodeName  --TODO: const wstring&	1
// TODO: a better way to get hash value?	1
ElemType sum = 0; // TODO: Do this in 'double'?	1
MapAndUpdateONNXType(onnxOpName, false, 0, output.GetDataType(), &outputArgType); // TODO: Is this needed? Probably not.	1
std::vector<size_t> backptroffsets;         // TODO: we could change this to 'unsigned int' to save some transfer time	1
// TODO: Except X, all other inputs to GRU are treated as constant.	1
// TODO: Once we do nested loops, then the FrameRange argument to this will refer to the outer loop.	1
// TODO generalize those for ReaderLib / Reader / CNTK	1
// TODO: in forward, we don't actually care if we propagate into a gap; could avoid a few unnecessary conditional copies towards the end	1
// TODO: Allow to reduce only over a single dimension, or a subset.	1
//TODO for very far future: Handle reduction on (multiple) sequences all in once: sequenceAxesToReduce	1
// TODO: rethink whether this is correct for example of negative strides	1
// TODO: Are we sharing memory correctly? (no big deal as these are small; yet would be nice)	1
// TODO: this is not threadsafe.	1
if (features.size() > 1) // TODO: If this ever fails, please remove this check. One sample had 2 sections, but I could not see where they were used; this check is to verify that.	1
// TODO: This is sort of redundant now--it gets the symmap from the HMM, i.e. always the same for all archives.	2
// TODO: add support for sparse.	1
// TODO: move these to Exports.cpp	1
// TODO: what about double?	1
// TODO: unify with ComputationNodeBase	1
// TODO: core CNTK. They are format agnostic and can be used with any type of	1
StreamMinibatchInputs* inputMatrices, // TODO: why is this a pointer?	1
bool SequenceReader<ElemType>::CheckIdFromLabel(const std::string& labelValue, const LabelInfo& labelInfo, unsigned/*TODO: LabelIdType?*/& labelId)	1
Matrix<ElemType>& AssignVectorNorm1Of(Matrix<ElemType>& a, const bool isColWise); // TODO: arg should be const	1
labels->push_back(std::move(vstr[i])); // TODO: is this an entire sequence, or multiple columns describing a single token?	1
// TODO: move the two-forward-pass support out of the reader.	1
# TODO have a better name for combine() in this case	1
// TODO: let's deprecate this eventually and just use "type"...	1
//// TODO: to skip a batch/sequence pack/uppack, we need	1
bool BatchLUSequenceReader<ElemType>::CanReadFor(wstring nodeName) // TODO: const wstring &	1
// TODO: Switch to a vector instead of an unordered_map	1
// TODO: we seem to already use TensorView, so this thing may no longer be needed. Too scary to remove.	1
atomicAdd(&resultValues[IDX2C(lhsRow, resultCol, numRowsLhs)], lhsValue * rhsVal); //TODO: this does not work with fp16 for sparse embedding	1
// TODO: Remove call to StartEpoch - this API is legacy.	1
// TODO: This should be done in the same manner for CNTK exe as well.	1
// TODO: This should not need to be called in case of wasDataRead == false, since in that case, returned values are invalid.	1
// TODO: update allocated range	2
# TODO: add a test case for passing a numpy array as initial values	1
// TODO: actually, we can start reducing all cpu values first, and then wait for the gpu->cpu transfer to finish.	1
// TODO: we should redefine this to write UTF-16 (which matters on GCC which defines wchar_t as 32 bit)	5
shapes[i].FlattenInPlace(k);                          // TODO: overdoing the immutable thingy much?	1
// TODO: following commented out attributes are not supported. Use default.	1
// TODO: test this	1
# TODO: bring this back once we have a design for name-accessible .outputs etc.	1
// TODO: to figure out sparse matrix size	1
// TODO: use EqualCI(), and use camelCase, e.g. classLSTM	1
// TODO: They are not really temporally ordered, so a better way would be to use tensors, once that is ready.	1
// TODO: do we need all these 'this->'?	1
// TODO: ^^ Is that correct? Should we use a tensor here, TensorShape(rows0, rows1)?	1
double m_fsSmoothingWeight; // frame-sequence criterion interpolation weight    --TODO: can this be done outside?	1
// ... TODO: handle long NT paths	1
// TODO: move 'PrepareNewSweepIfNeeded' inside the sequence randomizer and drop this requirement.	1
MapAndUpdateONNXType(onnxOpName, true, inputIndex, input.GetDataType(), &inputArgType); // TODO: Is this needed? Probably not.	1
// TODO: Can be simplified if we only randomized sequences forward.	1
// inherit tensor dimension from sourceData, minus the last (column or time) dimension. TODO this needs to become simpler...	1
using namespace Microsoft::MSR::CNTK; // TODO: we should not have this in a header	1
// TODO: this code is needed for ONNX converter because ONNX requires squeeze axis. However, unit test failed with this code.	1
/*TODO: merge with call site*/ void BackpropToRight(Matrix<ElemType>& inputFunctionValues, Matrix<ElemType>& inputGradientValues, Matrix<ElemType>& gradientValues)	1
// TODO: Waiting Skype smart reply with attention model before enabling the functionality of tracking sequence dimension.	1
// TODO: use a pool of buffers instead of allocating a new one, each time a chunk is read.	1
// TODO: UniqueNodeNameStorage is causing model validation failure.	1
// FIXME ignore is ignored	1
// TODO: the file's name is too general to be included from outside; MathHelpers.h?	1
// ... TODO: does the above actually do anything/significant? nfwd != targetstep?	2
elif not isinstance(batch, list): # TODO: allow general iterables	1
// TODO: use static LogAdd() as defined in TensorOps.h	1
// TODO: Check why l2Factor is not applied to L1. Bug?	1
double freq = q.top().second; // TODO: why double?	1
// TODO add back special cases	2
// TODO: this should be a bulk operation; this implementation is a quick hack	1
// TODO: When running in parallel with MPI, only commands in 'commandstoRunOnAllRanks' should	1
// TODO: Actually this should never be needed, because each time dimensions change, we init.	1
// TODO: change this to variadic templates, then we can instantiate everything we need through this	1
// TODO: handle all cases	1
//TODO: Consider using a vector/unique_ptr here to avoid potential memory leaks	1
#else // TODO: remove this once we got this figured out	1
// TODO: Should not throw if std::uncaught_exception()	1
// TODO: Once we do in-place, the above must include a copy-to-self check (pay special attention to adding vs. copying).	1
// m_pMBLayout->SetWithoutOr(uttPos, timePos, MinibatchPackingFlags::SequenceStart);   // TODO: can we use Set() (with OR)?	1
if (word4idx.size() != nwords) // TODO: Why not infer it at this point in time? If labelInfo.dim == 0 then set if to word4idx.size()	1
/*TODO: merge with call site*/ void BackpropToRight(Matrix<ElemType>& leftDivRight,	1
// TODO: is it guaranteed that the GPU is already completed at this point, is it safe to overwrite the buffers?	1
// TODO: should we just drop this flag and return false if we cannot fulfil this request?	1
// TODO: investigate whether this is really needed.	1
// TODO: Encapsulate (freq, firstToWrite) as an update schedule type.	1
// TODO: also allow ... syntax, where ... refers to the directory of the enclosing file	1
// TODO: how does the file distinguish float from double?	1
// TODO: We could do something more interesting with tensors.	1
double expAvgFactor, // TODO why not ElemType? same for the other parameters, functions?	1
// TODO: We should verify that indeed this node is not referenced by other nodes or node groups,	1
// TODO: Can this just exist inside SGD.cpp?	1
InputRef(1).InvalidateMissingGradientColumns(fr); // TODO: This should not be necessary.	1
// TODO: Does it make sense to parallelize this?	3
m_lookupTableOrder = config("lookupTableOrder", "0"); // TODO: What is this?	1
for (size_t k = 0; k < m_dims.size(); k++) // (TODO: we can save one multiplication here)	1
// TODO: bias is at src->Inputs()[1] (inputs[1]) for New Conv case.	1
//TODO: Set proper format on matrices?	1
// TODO: we already have EqualCI() in Basics.h which does the same thing.	1
#pragma warning(disable : 4996) // Caused by the TODO below (line ~1280)	1
return (long) lineCount;    // TODO: change to size_t	1
// TODO: All RequireSizeAndAllocate should be async and use a transferer.	1
const double& hsmoothingWeight, // TODO: Why are all these passed by reference?	1
#else // TODO: test this	1
//TODO: Need to record the original rate and the reference mbsize so that the unit gain factor can be computed correctly.	1
// TODO: make function value sparse?	1
// TODO: compressed_sparse_csc = 2, // indices are encoded as var-ints	1
// TODO: It does not make sense to run LegacyReshapeNode frame-by-frame inside a loop, because it changes the time base.	1
// TODO: don't we need to destroy ourselves?	1
// TODO \r should be handled elsewhere; refine this	1
// TODO: Some of these accessors should be merged into single methods like SetBuffer.	1
// TODO: Clarify/resolve the semantic overlap between BeginForwardProp() and UpdateFunctionMBSize().	1
// TODO: this needs to be refactored to get rid of all statics	1
# TODO: ^^ is this still necessary? Or is this a sanitize() call we need here?	1
if (newNode->NodeName() != nodeName) // TODO: This was not tested for earlier; I hope no code depends on this.	1
vector<wstring> m_ioNames;                          // TODO: why are these needed, why not loop over m_dataReaders?	1
// TODO: the sum of sizes does not account for a possible gap before the sequence offset.	1
// TODO: Once we do in-place, the above must include a copy-to-self check (either here or inside the tensor lib).	1
if (numframes != classids.size()) // TODO: remove this once we are confident	1
std::vector<transP> transPs;                       // the transition matrices  --TODO: finish this	1
// TODO: it may be slow to rely on TestMinibatch to return error each time, since it may require transfer	1
// TODO: further opportunity for speed-up: use 'mean' from last round for 1-bit and stddev calc	1
// TODO: This is called somewhat inconsistently, sometimes with a=*this, sometimes with b=*this.	1
size_t mMaxSentenceLength;       // max over mSentenceLength[]  --TODO: why not compute on the fly?	1
// This function does not quite fit here, but it fits elsewhere even worse. TODO: change to use File class!	1
// TODO: remove all formats that are actually not supported	2
// TODO: Handle these cases:	1
// TODO: What the hell is this?	1
for (int i = startEpoch; i < (int) m_maxEpochs; i++) // TODO: why is this an int, and not a size_t?	1
// TODO: If we have a truncated-BPTT state then verify that the sequence indices match with m_state->m_sequences, and the tensor dimensions.	1
Matrix<ElemType> slicePrior = DataFor(*m_prior, fr); // TODO: use the right MBLayout, then we won't need the special case	1
// TODO: this view change only accomidates this request	1
let firstIsDouble  = firstVal.Is<Double>(); // TODO: make this a std::array?	1
# TODO: should we allow to pass fixed weights instead? Like for Embedding? E.g. audio filters	1
# TODO: ComputeInputPerDimMeansAndInvStdDevs	1
// TODO: this is experimental code to load Facebook Caffe models.	1
// TODO: use GetMinibatchIntoNetwork().	1
// TODO: leverage sparse if the original NDArrayView is in spase.	1
# TODO __xor__, __rxor__, __pow__, __rpow__,  __invert__	1
# TODO: Add direct conversion, since creating an intermediate array might be slow	1
// ... TODO: should this be moved into the base class? no need for separate type, just have a stripe() function just like col()	2
// TODO: This currently only supports nodes of the same ElemType. We could allow conversion operators.	1
// TODO: Adapt this to new MBLayout, m_sequences would be easier to work off.	1
// TODO: to copy other variables used only for class based LM	1
// TODO: We should directly pass the actual input gradient tensor to the Backward method	1
// TODO: move this to class File as well	1
// TODO: for now all param cases are for free dimension	1
#if 0 // TODO: change design to keep the #frames in the TOC, so we can check for mismatches before entering the training iteration	1
return half(powf((float)v , (float)e));     //TODO: Improve efficiency?	1
// TODO: We currently only support external nodes that cannot be part of CNTK recurrent loops	1
// ^^ TODO: we can merge these two	1
// TODO: We could implement an overlay IConfigRecord implementation that fakes the two values that are being added to the interface.	1
// TODO: Do we need both this set and the one above that does not add inputs? Can they share more code?	1
// cudaarrayref<float> logLLsarray;        // TODO: pass this in, of course	1
// pass on some global option    --TODO: Why is this not done inside each reader??	1
// This is a weird interface, as it makes also sense for a matrix. TODO: Fix this.	4
tensor = Tensor[-2] # TODO: find the correct symbol for the sentinel value	1
// TODO: sanity check and use records as a clue of how big to make it	1
// TODO: Don't use m_inputValues, traverse ourselves, to remove dependency on FormEvalOrder().	1
// TODO: test whether that is true	1
// TODO: get from an env variable	2
// TODO: This should be a method of ComputationNetwork, not ComputationNode.	2
// TODO: no longer used, remove this. 'transcript' parameter is no longer used in this function.	1
const FrameRange *parent; // or NULL: parent range, relative to which this FrameRange is interpreted  --TODO: not used yet	1
// ...TODO: eat trailing space like fscanf() doessurrounding space)	1
// TODO: Can this just be a typedef?	1
// TODO diagnostics for paged out chunks?	1
ifstream fp(inputFile.c_str()); // TODO: use class File, as to support pipes	1
fprintf(stderr, (", %2." + to_string(mbProgNumPrecision) + "f%%").c_str(), mbProg * 100); // --TODO: use a * format?	1
// TODO: should we deallocate in opposite order?	1
RowStackNode(DEVICEID_TYPE deviceId, const wstring& name, int spliceDim = 1/*TODO: complete this*/)	1
// TODO: Or should we add an additional dimension?	1
// TODO: overload the << and >> operators for serializing TensorShape	1
// TODO: Except X, all other inputs to LSTM are treated as constant.	1
Matrix<ElemType> funcVal = Value(); // TODO: This just creates a 1x1 matrix set to 0.	1
// TODO: in case of the chunk-based randomization window, we couldn't care less	1
// TODO: Add() does not yet correctly handle the failfn. It is meant to flag the location of the variable identifier	1
// TODO: mbSize and truncated should be specified differently for truncated BPTT:	1
// TODO: This function actually consumes the white-space characters. Document that behavior.	1
operator std::string() const { return *this; } // TODO: does not seem to work	1
if axis.name == "UnknownAxes":  # TODO: what is the correct way of testing this?	1
// note on exprPath: since ! has only one argument, we do not include it in the expressionPath  --TODO: comment correct?	1
else if (e->op == L"**") InvalidInfixOpTypes(e); // TODO: implement this	1
// TODO:	5
// TODO: Do we need both this set and the one above that does not add inputs? Can they share more code?	1
// ... TODO: this can be a 'static', as it should only be set during foreach_node but not outside	2
// TODO: Check if this is a derived type and throw an exception in that case	5
// TODO: batch shall be one?	1
# TODO: remove default resolution, only make this a conversion; then rename	1
/*TODO: merge with call site*/ void BackpropToLeft(Matrix<ElemType>& inputFunctionValues, Matrix<ElemType>& inputGradientValues, Matrix<ElemType>& gradientValues)	1
// TODO: Would be more direct to fold this into the table below as well.	1
// NV_TODO: optimize speed -- pass things needed in, optimize kernel speed, add half2	1
&matrices.GetInputMatrix<ElemType>(iter->first)); // TODO: use a reference instead of a ptr	1
// TODO: use volatile* and then we can skip the __syncthreads() for the last 32 values. See Amit's allreduce() function implementation in MatrixQuantizer_kernel.cu.	1
#if 1                                                // TODO: test whether this works correctly w.r.t. typecasting	1
// TODO: consider swapping the arguments in this case	1
// TODO batchSize == 1	1
// TODO: Why not fail here?	1
# TODO: -Werror                                                                                 # Treat all warnings as errors.	1
// TODO: Should we use a proper priority_queue?	1
// TODO: sanity check for all variables to have the same shape and data types.	5
# TODO figure out how to pass infty to C++ in a portable way	1
# TODO: support more types.	1
void UCIParser<NumType, LabelType>::ParseInit(LPCWSTR/*TODO: change to C++ type*/ fileName, size_t startFeatures, size_t dimFeatures, size_t startLabels, size_t dimLabels, size_t bufferSize, size_t startPosition)	1
public: // TODO: make private again once	1
// (TODO: We could force-evaluate the boundary input here.)	1
// TODO: Change the lambda to take a pointer and a number of elements, so that we can pass it 1 or 4 elements, in order for it to SSE-vectorize.	1
// TODO: No, return all; and leave it to caller to redistribute them [Zhijie Yan]	1
// TODO: Can probably be faster by using the sequence array directly.	1
// TODO: deserializers and transformers will be dynamically loaded	1
# FIXME: seq_starts	1
- elementwise nonlinearities as usual  [TODO: complete them]	1
int nbrCls = config(L"nbrClass", "0"); // TODO: why int and not size_t?	1
// TODO: make this independent of ElemType. Then these repeated dynamic_pointer_casts will go away	1
# TODO: we should always use 'shape' unless it is always rank-1 or a single rank's dimension	1
# TODO sanitizing should be removed once Swig's typemaps are in place	1
// Implements an algorithm by Mikolov --TODO: get the reference	1
// TODO: We should do this right after the GetMinibatch() call, since that's where these changed.	1
// TODO: make this a runtime parameter.	1
// TODO: replace this copy with an alias to value.	1
# TODO: map_rank is broken. We should specify the #slowest-changing axes. E.g. 1 would work for images and vectors. Requires C++ change.	1
# TODO: fix the bug in backprop for sparse, and use sparse embedding to accelerate	1
// TODO: Is there more than nodes that we want to return? Node groups? deviceId?	1
// TODO: check the exponent value (see FLT_[MAX/MIN]_10_EXP).	1
m_temp->AssignElementDivisionOf(*m_temp, *m_result); // TODO: this is in-place--does this function allow that?	1
// TODO: currently we only support one node for regularization	1
// TODO: reference any additional headers you need in STDAFX.H	17
// TODO: This should be a private but if not made public, the python bindings build complains about an unresolved external	1
// TODO: Do not cache this before reordering; get list & pass to FormRecurrentLoops() which reorders it, then store it (such that GetEvalOrder(nullptr) is always valid w.r.t. loops).	1
// This is meant to be used by FormRecurrentLoops().  TODO: Hopefully this can be not done anymore some day.	1
// TODO: not very nice--need to fix way more outside to get this right	1
//// TODO: make bidirectional RNN work by figuring out output data	1
// TODO: DiagTimes is also an alias of ElementTimes; current separate implementation is unnecessary.	1
// TODO: should be deprecated.	1
return;     // ... TODO: tell parallel_for_on_each_numa_node() to only have one step, or parallelize	2
#include "MPIWrapper.h" // TODO: does not belong here	1
// TODO: turn Print into PrintF; e.g. PrintF provides 'format' arg. Printf('solution to %s is %d', 'question' :: 42)	1
// TODO: We should be able to move instead of copy but it currently isn't straightforward	1
// TODO: This kernel has very poor performace and needs to	1
// TODO: We currently only support input operand with 1 dynamic axis for PastValue/FutureValue	1
// TODO: Why are all these static, but then take a network as the first argument? --> make them class members	1
# TODO: Is this confusing w.r.t. tuple which is parallel and list which is sequential?	1
// TODO: Should do more perf tests before unifying these two.	1
#        for kw in kwargs: # TODO: only allow one arg	1
// TODO: make Java binding deal with double*, float * and int * correctly.	1
# TODO nitpick_ignore	1
Matrix<ElemType>& labels    = matrices.GetInputMatrix<ElemType>(m_labelsName); // will change this part later.  TODO: How?	1
// TODO: modify file format to know this; then eliminate the <ElemType> dependency (and in some future, allow nodes to be different)	1
mutable int m_devicesTransferedTo[2]; // TODO: what is this for? Seems only diagnostics	1
// TODO: can we just define it as private without implementation?	2
// bool m_needToNormalizeLRByParallUtterance;          // TODO: should go away	1
TensorShape tensorShape = GetSampleLayout(); // TODO: Do we need to expect this tensor to have arbitrary strides? In case it came out of a Slice, Reshape, or Transpose op in-place?	1
// TODO: left-over of Linux compat, can be done nicer	1
// TODO: original code did not call this for actualMBSize == 0	1
// TODO: apdapt e2e tests to run with aggregator of type ElemType.	1
//   TODO: can return value be negative? If not, use size_t	1
CreateCurandObject(seed, __FUNCTION__); // TODO call ResetCurandObject() instead?	3
// TODO: Currently we only support concatenation of inputs of the same size.	1
// TODO: switch to char when possible.	1
// TODO: member variables go to the end	1
#include "ssematrix.h" // TODO: why can it not be removed for Windows as well? At least needs a comment here.	1
// TODO: should this function test whether the size is changing, and skip if it isn't? We have at least one explicit test for this code calling this (recurrent node)	1
// TODO: There are too many of these. This indicates improper class hierarchies.	1
RequireSizeAndAllocate(v.GetNumRows(), v.GetNumCols(), v.NzCount() ); // TODO: rename to *Bytes/*Count instead of vague *Size if possible	1
} // TODO: really? Return a reference to a local? TODO: change to const? and call it GetEvalMode()	1
// TODO: There is a lot of duplication between this function and the NDL version.	1
static void SetDevice(DEVICEID_TYPE deviceId); // TODO: unify with PrepareDevice()	1
// TODO: should we allow paths relative to TOC file?	1
// TODO: change to taking a regular format string and a :: array of args that are checked. Support d,e,f,g,x,c,s (s also for ToString()).	1
// TODO: This clips the divisor by a small value. Is that really what one would want?	1
#ifdef _MSC_VER // make some old configurations reproducable (m_cacheBlockSize used to be a constant)  --TODO: remove in a few months	1
// TODO: Variable hash should be based on uid.	1
// TODO (this is still unfinished):	1
// TODO: Should this be done in SGD::Adapt()?	1
// TODO: We need ternary ops where the output storage is separate.	1
// TODO: This is duplicated in GPUMatrix.cu	1
// TODO: better precision (at the moment we're at parity with UCIFast)?	1
// TODO: Most of these are reduce nodes that output a single number, no MBLayout. Maybe abstract those out further	1
// TODO: RegexReplace()	1
// TODO: make type casts members of the SparseSequenceData	1
// TODO: add TryRename that does not throw.	1
// TODO: I find that HVite emits redundant physical triphones, and even HHEd seems so (in .tying file).	2
//TODO: Preliminary study shows that the unitgain factor should use the raw momentum instead of the scaled momentum as the following:	1
// This will drag along the gaps as well, hence we mask them to zero above. --TODO : this is not optimal.	1
rem: TODO: add check whether javac/jar exist.	1
takesBool = true; operationName = L"ElementTimes";  // implemented as element product  --TODO: needs a C++ node	1
// TODO: Can we optimize this and only copy if there is a sequence spanning across the end of the MB? And add a check to BeginForwardProp() to make sure we got one if there is a boundary at the start?	1
// TODO: share stuff with MeanNode	1
if (!p) // TODO: can we make this look the same as TypeExpected in BrainScriptEvaluator.cpp? We'd need the type name	1
// TODO: Support transferring the quantization output to a quantized matrix on the GPU	1
// TODO: Lift this into config language, move underlying code to math lib. This should be a model-editing operation.	1
// TODO: reuse loaded sequences instead of creating new ones!	1
// TODO: should this be a function template?	1
// TODO: Would this lend itself to a unique_ptr instead of the init flag?	1
// TODO: Do we really need these? Folks who want to use C++ can instead say net->AddNodeToNet(New<>(...)), which is not that different.	1
// TODO: Variable equality should be based on uids.	1
// ... TODO: honor ppl_cores == 1 for comparative measurements against single threads.	2
// TODO: This definition is poor; we should use a different node name, and specify the factor directly.	1
net->AllocateAllMatrices(evaluationNodes, additionalNodesToEvaluate, criterionNodes[0]); // TODO: use criterionNodes.front() throughout	1
// TODO:	1
// TODO: two lines above should be changed as follows:	1
{L">>", 5}, {L"<<", 5}, // TODO: do it as other languages	1
# TODO: consider using cntk.core.Value.one_hot here.	1
# TODO: if reduction_rank==0 and sequential, we don't need the fake reduction axis, just use the sequential axis instead	1
# TODO: running_count should be right after running_inv_std; no need for upwards compat	1
// TODO: Pad op is not intuitative or it could be a bug. One would think begins before end.	1
#if 0 // TODO: move to separate header file numahelpers.h	1
// TODO: similar to DumpInfo; used by ExperimentalNetworkBuilder test implementation	1
// TODO: add Resize function.	1
// TODO: rename to ConvolutionGeometry	1
// TODO: Possibly set m_valid to false, but currently preserving the old behavior.	1
// TODO: ^^ This should depend on the sequences themselves.	1
// TODO: same for BatchNorm	2
foreach_index (i, allchunks) // TODO: this cries for iterating using the iterator!	2
// TODO: Unify with shared DataWriter.cpp.	1
// TODO: set attrs["value"] for Constant - how to get the value?	1
#define UsingComputationNodeMembers /*without OperationName; needed to support inconsistent pattern of InputValue--TODO: This comment it out of date. */ \	1
// TODO: Currently supports only packing of streams with sequences of equal length.	1
// TODO: Should go away in the future. Framing can be done on top of deserializers.	1
// TODO: make bidirectional GRU work by figuring out output data	1
/// TODO: Memory provider should reside on the matrix. It is responsibility of the network	1
// TODO: Memory provider should be injected by SGD.	3
// TODO: change to TensorView and AssignCopyOf() with reduction	1
// TODO: change all three '512' to 'GridDim::maxThreadsPerBlock' (not doing this now since I cannot test it)	2
unsigned long long m_gammatime; // TODO: what are these? Not even the context can be guessed from these names.	1
unsigned flags; // flags that apply to this sequence   --TODO: We really need to know at least what those flags are, if an enum is asking for too much.	1
// TODO: Could probably be a memory mapped region.	1
// numbers - pointer to vector to return the numbers    --TODO: what the hell are those numbers?	1
// TODO: drop call_once and co. and make cached devices a local static, once we're on VS2015.	1
// TODO: make sure O_TRUNC is added.	2
# TODO: sharing = false? I'd need that for speech feature extraction.	1
// TODO: This is a special kind of tensor product, and calls for a tensor representation.	1
// TODO: In the future, we should provide a NEON based implementation instead.	1
else if (std::find(strType.begin(), strType.end(), L"SIMPLERNN") != strType.end()) // TODO: camelCase	1
// TODO: Move this further down; or decide whether the 'nullptr' version is needed, other than ResetMBLayouts() which could use the global order and filter by itself.	1
// TODO: change all id args to wide strings, then update the code.	1
//TODO: The unit gain term (1-beta) should stay as it is (currentMomentum) instead of using the following scaled term.	1
// TODO: This ^^ should go away once SGD gets fixed to take the truncation size as a parameter.	1
// TODO: Assignment operator	1
unsigned int firstframe : 16;                         // TODO: obsolete; once removed, we are back at 32 bits--yay	1
// TODO: better return 0; it is then still valid to compute lossAverage * numSamples	1
#  - maps init_default_override_or_glorot_uniform to default  --TODO: we should have a global setting for that	1
// TODO: remove this	1
// TODO: This is duplicated in BestGpu.cpp	1
// TODO: replace this with TensorOps.h LogAdd(). It differs in using ElemType throughout, while this one seems to use 'double' versions of exp() and log().	1
{                                     // ^^ TODO: remove this	1
// TODO: Should be removed, when legacy config goes away, expects configuration in a legacy mode.	1
// TODO: Should be deprecated.	1
m_parser.SetFilePosition(0);    // TODO: can this ever be set to not 0?	1
// TODO: How to do this right in case of arbitrary strides? Compute the new stride based on m_allocation or something? Is it even possible? Or do we need to guard?	1
// TODO: these are return values as well, but really shouldn't anymore; only used in some older baseline code we some day may want to compare against	1
// TODO: What??	1
else if (std::find(strType.begin(), strType.end(), L"CLASSLSTM") != strType.end()) // TODO: camelCase	1
// TODO: alternatively, print a warning and return false.	1
// TODO: should we unify sample and sequence mode packers into a single one.	1
// TODO: RegexReplace!	1
// TODO: This ReshapeNode should no longer be used. Its function will be taken over by Transpose and the Reshape that follows this one below.	1
//    right after the brace, e.g. [- a - b] will separate using '-' instead of ';'. TODO: document what this is used for.	1
// TODO: these two are always parallel, merge them together?	1
// TODO: Move Constructor	1
// TODO: if we only skip a limited number of bytes, fread() them	1
// ... TODO: actually, is subtracting 1 the right thing to do here?	1
size_t sLen;    // TODO: say what these are	1
en = en > numParallelSequences ? numParallelSequences : en; // TODO: why are these two tests necessary? We should rather test rank	1
else if (EqualCI(randomizeString, "auto") || EqualCI(randomizeString, "true"))  // TODO: "true" is inconsistent here, should be deprecated	1
m_switchFrame[0] = m_mbSize + 8888; // TODO: WTF??	1
// TODO: above push_back does not actually 'move' because the internal push_back does not accept that	2
// TODO: According to Amit, the VS compiler is not able to vectorize into lambdas. Solution: change the lambda to take an N, or to implement the loop inside (with 1 element by default).	1
/*TODO: merge with call site*/ void ForwardPropS(Matrix<ElemType> postprob, Matrix<ElemType> alpha, Matrix<ElemType> beta, Matrix<ElemType>& functionValues, const Matrix<ElemType>& lbls, const Matrix<ElemType>& pos_scores, const Matrix<ElemType>& pair_scores, int& firstLbl, int& lastLbl, const int iStep = 1)	1
for (auto iter = matrices.begin(); iter != matrices.end(); iter++) // TODO: range-based for	1
// TODO: allow outputRank < 0 meaning to denote "all but", from right	1
// TODO: also check the #frames here; requires a design change of the TOC format & a rerun	2
// TODO: Verify that node->GetSampleLayout().GetNumElements() == 1. Require explicit summation to declare intent that this is a criterion.	1
// TODO: There is still debate whether an InputValue without layout makes sense.	1
# TODO: should we have a parameter to specify the arity of the input?	1
# TODO: should both activations be replaced?	1
// TODO: implement this; also as basis for overriding parameters from the cmd line	1
// TODO: unify with the Find() function below	1
// TODO: move these into GetMinibatchIntoNetwork()  --but those are passed around; necessary? Can't we get them from 'net'?	1
#define TWO_PI 6.283185307f // TODO: find the official standards-confirming definition of this and use it instead	1
// TODO: Are these meant to be disjoint?	1
// TODO: how to handle batch size not being one?	1
// TODO: Move this error check there, since this is called only from one place.	1
// TODO: The signedness of k (required for omp) causes an extra sign-extend.	1
// TODO: This calls for a tensor representation!	1
// TODO: Or should we just blast m_distanceToStart to GPU, and maks based on that? It is small compared to features.	1
// TODO This currently reads a ComputationNode object from a property, thereby bypassing "normal" input handling.	1
// TODO: change this later on	1
lblInfo.m_classInfoLocal->SetValue(0); // TODO: needed? (left-over of refactoring)	1
// TODO: After the change to shared_ptrs, this may no longer be necessary.	1
// TODO: handle hasSequenceAxis cases	1
// TODO: What is the expressionPath?	1
// TODO: why is this value not used?	1
// TODO: Should be async	1
//TODO: because sparse setting value on non-zero sparse matrix involves	1
# TODO: this is not concurrency safe. Another job could use a directory we are trying to remove ...	1
DEVICEID_TYPE m_deviceId; // TODO: is this shared by all nodes?	1
// TODO: support cases where batch size is not 1.	1
// TODO:	5
// TODO: WE SHOULD REMOVE THIS HACK ASAP.	1
shared_ptr<Matrix<ElemType>> m_zeroMatrix;              // constant [1]-dimensional 0 used for backprop  --TODO: could use a static map[deviceId]	1
// TODO: This code is still based on the old behavior, so that all current tests pass.	1
auto p = dynamic_cast<ComputationNode<ElemType>*>((ComputationNodeBase*)vp); // TODO: check that all void* casts really come from ComputationNodeBasePtr; or add a method ToVoidPtr(). Or get rid of the void*?!	1
//TODO: test if it's a string tensor	1
# TODO: merge this. Test: Tests\EndToEndTests\CNTKv2Python\Examples\deconv_MNIST_test.py, Tests\EndToEndTests\Examples\Image\GettingStarted\07_Deconvolution	1
// TODO: CNTK config added "traceLevel = 0" to 'config'. In BS, we cannot do that (IConfigRecord is immutable). Solution: Just say "traceLevel = 0" in the BS macros for readers.	1
// TODO: Avoid copying the data when possible	2
const std::wstring minibatchCountKey = L"minibatchCount"; // TODO: Python-style spelling	1
// TODO: This is meant as a stop gap, the minibatch source should be properly drained instead.	1
// TODO: This is a temporary debugging aid and should be removed after the functionality to late bind	1
using namespace std; // TODO: ugh!	1
# resolve tuples and NamedOutputs  --TODO: check for duplicates	1
// TODO: deserializers.	1
// TODO: update allocated range  --also enforce consecutiveness	4
// TODO: extend this method to handle bidirection LSTMs.	1
// TODO: This should be in the storage object.	1
// TODO: We can removed these options once we can adjust learning rate at minibatches level	1
#else // TODO: test this; e.g. does st_mtime have the desired resolution?	1
// TODO: make these functions friends of NDViewArray and move to Utils?	1
// TODO: more test is needed to cover general cases where batch and sequence axis are involved	1
// TODO: next step: use PARTraversalFlowControlNode::AllocateGradientMatricesForInputs() and ReleaseMatricesAfterBackprop()...	1
# TODO: it would be great if in a future version we could recognize and support Python 3.5 typing.Sequence	1
// TODO: The next few do not belong into SGD any more than the network or reader we operate on. Either move network and reader in here, or move these out.	1
// TODO: why not write a BOM?	1
// TODO: How to deal with the specified 'computeDevice'	2
if (vstr.size() < 3)    // TODO: Document this special condition. Why should we not process empty sequences like <s> </s>?	1
// Call this at the end because this will resize Value(), but that requires the updated MBLayout. TODO: Clarify the sequence of events. Should we update the MBLayout in UpdateFunctionMBSize()?	1
/*TODO: merge with call site*/ void BackpropToS(Matrix<ElemType>& gradientOfL1Norm,	1
shared_ptr<ComputationNode<ElemType>> pLearnableNode = node; // TODO: what's this for?	1
for (const auto& node : GetEvalOrder(rootNode)) // TODO: verify that order does not matter here, then replace by GetAllNodesForRoot()	1
// TODO: Use std::lower_bound	1
// TODO: find a way to use CPUDEVICE without a huge include overhead; OK so far since CPUONLY mode is sorta special...	1
// TODO use boost::algorithm::join, boost::adapters::transformed, make this a generic function	1
// TODO: more details when tracing warnings	1
/*TODO: merge with call site*/ void BackpropToS(Matrix<ElemType> inputGradientValues, const Matrix<ElemType>& gradientValues, const Matrix<ElemType>& inputFunctionValues, const Matrix<ElemType>& functionValues)	1
// TODO: This is now ignored on input, so we can should change to an empty string. This might break parsing, and must be tested first	1
- TODO: use a bool vector for the time dimensions --> Gather()	1
assert(idx == (LabelIdType) NULLLABEL); // TODO: what other conditions?	1
// TODO: we need a CopyGPUToCPUSync	1
// loop through all the samples and create a one-hot representation, or multi-hot in some conditions (TODO: which condition)	1
// Towards nested loops:  --TODO: implement this	1
// TODO: Reformat DISPATCH... macros to the following form:	1
m_allocation = m_dims.empty() ? 1 : m_dims.back() * (size_t) m_strides.back(); // TODO: Or should an empty shape mean it's a scalar?	1
&matrices.GetInputMatrix<ElemType>(iter->first)); // TODO: use a reference instead of a ptr	1
// TODO: Instead of passing numAllSamples in here, we should determine it from the inputs in case of no layout. Or simply forbid this case.	1
//By defualt, V1 uses UnitGain momentum. TODO: Do we need to enable V1 with non unit gain update?	1
// TODO: This function can be further improved by loading the kernel in shared memory	1
// TODO: This can be done more efficiently by using m_sequences[].	1
// TODO: Add a special case for tensor bias reduction. cudnn is ~7% faster on Image/QuickE2E.	1
// TODO: Allow creating inputs without a dynamic axis	1
/// TODO:	1
// TODO: Why not just reload it? Because SGD::Train() holds pointers to the parameters directly? That should be fixed.	1
// TODO: Fix CNTKLibrary.h and CNTKLibraryInternals.h for CNTK_HEADERONLY_DEFINITIONS.	1
// TODO: needs numerically stable implementation.	1
// TODO: exclude updating progress writers from profiling?	2
} // TODO: later use memset()	2
// FIXME: The following is not picked up yet, which is why we have to tag it to	1
# TODO: accept Python's typing.Sequence instead; then import layers.typing by default in layers.__init__.py	1
// TODO: use if (!Is<ITakesDynamicAxis>(node))...	1
// TODO: Use this to implement ComputationNode::ConstOnes? Or do we even need that anymore?	1
u_masked = u + (h_enc_valid - 1) * 50     # logzero-out the unused elements for the softmax denominator  TODO: use a less arbitrary number than 50	1
// TODO: Must be removed when SGD is moved to an untyped matrix.	1
// TODO: just use a lambda	1
const auto& featureNodes = FeatureNodes(); // TODO: a getter; should be called GetFeatureNodes()	1
// TODO: TensorShape should have a method to	1
# ^^ TODO: Complete the design for name-accessible .outputs, then bring this back.	1
// TODO: Move more generic functions such as getting dims, resizing, and getting/setting as scalars in here.	1
// TODO: Transpose flags for all matrices, inputs and outputs?	1
# TODO: use varint encoding for sparse indices,	1
// TODO: This is potentially an expensive operation. Need to do some logging.	1
// TODO: Remove code dup with above function by creating a fake Writer object and then calling the other function.	1
// TODO: we can store labels more efficiently now since we don't do frame-wise random access anymore.	2
// TODO: Is this really always an error? Are there valid cases where one would over-specify possible input nodes, even if they are not used/needed?	1
// TODO: Why Backward signature does not take Parameter instead of Variable for gradients?	1
// TODO: Do we need to hold this pointer at all? We will only query it once. Or is it used to hold lock to a GPU?	1
// TODO: multiview should be done on the level of randomizer/transformers - it is responsiblity of the	1
if (_wfopen_s(&mFile, fileName, L"rt") != 0)    // TODO: What does this warning do? Why not fail?	1
// TODO: The composition of packer + randomizer + different deserializers in a generic manner is done in the CompositeDataReader.	3
// labels - pointer to vector to return the labels   --TODO: change to reference	1
# TODO: Can bidirectionality be an option of this? bidirectional=True?	1
if (firstIsDouble) // Double/x --> only allow 1/x, implement as Reciprocal  --TODO: fix once we have ElementDivideBy	1
// ... TODO: use the proper constants here (slightly inconsistent)	2
// TODO: better move this special-casing into TensorView::AssignElementwiseProductOf()	2
// TODO: separate into nodes that have inputs and those that duplicate functions with input adding except just not adding inputs. Clear?	1
/// TODO: Document inferInputRankToMap	3
// TODO expAvgFactor == 0 && blendFactor == 1 can be optimized (no need for update).	1
// TODO: This will be changed, when we move transformers under the (no-) randomizer, should not deal with multithreading here.	1
if (m_sampleLayout.GetRank() < 1 || ((m_sampleLayout.GetRank() > 2) && notFlattenableTo2D)) // note: scalars are not stored as tensors of rank 0, but rather as 1-dim vectors. TODO: clean this up some day	1
#else       // TODO: delete this	1
// TODO: Get rid of this after-the-fact patch.	1
// TODO: add -Node to the class name	2
// TODO: are we treating gaps correctly here?	2
// TODO: Should be injected by CNTK into the reader (will be a member of Matrix class).	1
// TODO: Support inputs with > 1 dynamic axes	1
// TODO: consider exposing this somehow so that it is easy to test by setting it to small value.	1
// TODO: Below line assumes that the first output of the op (e.g. conv)	1
// TODO: can we use 'true' here instead? Above transposition hack won't work with row slices. 'obs' not used elsewhere	1
// TODO: Currently this is a no-op since the actual quantization is synchronous	2
# TODO: bring this back once the design has been settled	1
// TODO: should we also save momentum schedule into the checkpoint?	1
// TODO: Check whether these functions always map 0 to 0.	1
// TODO: share most code with MatrixL2RegNode	1
// TODO: Should it have a read-only StorageType property?	1
// TODO: Clean this up--do this only if numIds is 0 (no class or mapping read), otherwise require them to be identical or 0.	1
// TODO: move this out of the reader.	2
//TODO: Names of transforms and deserializers should be case insensitive.	1
// TODO: if this is too sensitive, we can add a margin on the bases of percentage of	1
const wstring& expressionName = nodeName;   // TODO: think this through	1
// TODO: We use the old CNTK config reader for this. With BrainScript, we would have to parse the file locally here, which should be easy.	1
// TODO: Change all of these throughout the codebase to 'class enum'. Also change all places where we still use integer constants.	1
// TODO: implement peephole	1
// TODO: Currently only default dynamic axis is supported	1
// TODO: What is this? Debug code?	1
// TODO: We also know that indexData and sourceData must have the same MBLayout. But that is checked at runtime.	1
if (idmap.empty()) // TODO: delete this: && !modelsymmap.empty()/*no mapping; used in conversion*/)	1
// string name = configFeatures.Name();            // TODO: Aaargh!!!	1
const auto& outputs = dynamic_cast<const Matrix<ElemType>&>(outputsb); // TODO: a NULL check, to be sure	1
// TODO: rename these to InPlaceFloor() and -Ceil() (I never know what it means to truncate a bottom)	1
// TODO: Currently this is a workaround to free static MPIWrapper, it will go away soon.	1
// TODO: This type conflicts with std::vector --we should rename it	2
// TODO: make ONNX MeanVarianceNormalization and CNTK test work with sequential models.	1
// TODO: We should use memset(), but that only works if there are no extra rows (in a patch). Do we even allow non-stripe patches? I don't remember... CUDA lib does.	2
// note on exprPath: since - has only one argument, we do not include it in the expressionPath  --TODO: comment correct?	1
void fillOneUttDataforParallelmode(StreamMinibatchInputs& matrices, size_t startFr, size_t framenum, size_t channelIndex, size_t sourceChannelIndex); // TODO: PascalCase()	1
// TODO: Implement CSR as a transposition of b, like we do for GPU.	1
// TODO: Implement optimized diagonal functions for sparse matrices. For now use the DiagonalToDense instead.	1
// TODO: Why the ^^ namespace?	1
const size_t tbegin = max((size_t) t, randomizationrange / 2) - randomizationrange / 2; // range of window  --TODO: use bounds() function above	1
// TODO: As the next step the packers will be moved out of the readers into the	1
// TODO: specific to LSTM. icfo (CNTK) to iofc(ONNX)	1
// TODO: It seems that this is also applied to other tyoes of nodes, so the name of this function is wrong.	1
{ // TODO: rename this	1
// TODO: Move this to class File, as this is similar in nature to LoadMatrixFromTextFile().	1
// TODO: do not specify 'Truncated' but 'TruncatedLength', set m_truncated so given, and let m_mbSize control how many #parallel sequences the reader is allowed to pack into an MB.	1
// TODO: This class should go away eventually.	3
// TODO: We currently only support one backprop root	1
// TODO: Rewrite this constructor to eliminate the external buffers flag. Make a separate construction mechanism for Matrix objects that don't own their storage.	1
// ... TODO: Currently, 'cores' does not limit the number of threads in parallel_for() (not so critical, fix later or never)	2
// TODO: if function pointer or lambda could support template, switch to use them.	1
// TODO: which learning rate schedule should take precedence here?	1
// TODO: Move assignment operator	1
// TODO: This is currently not immutable because it gets patched w.r.t. recurrent loops. Ideally we don't patch. Need to review and verify that it is sufficient.	1
// TODO: method names should be CamelCase	1
WriteFormattingOptions() : // TODO: replace by initializers?	1
// TODO: all logging functionality should be refactored to live in a logging utility class.	1
// TODO: do we need to destroy ourselves as well?	1
// TODO: Should have proper loggin instead.	1
// TODO: move this to the MBLayout where this can be done together with the creation of the other mask and is likely to further improve performance.	1
# TODO: Should we cater to the special case of 1D convolution for text? I.e. sequential only (filter_shape=()).	2
// TODO: do we want to allow accept non-empty strings and non-0 numerical values as 'true'?	1
static shared_ptr<ElemType> reductionBuffersCache[32]; // cache of objects    --TODO: Do we have a #define the max somewhere? Then also use it in CPUMatrix.cu GetOnesTensor()	1
// TODO: Should we use MPI_Bcast instead for better performance	1
ch = trim(ch); // TODO: operates in-place, so no need to assign back	1
# TODO: having to pass the dynamic axis is suboptimal. Any better way?	1
// TODO: encapsulate it into a destructor? Note: Cannot throw exceptions in destructor.	1
// TODO: FPGA	1
// TODO: Change this interface when SGD is changed.	1
//TODO: replace the struct option with dictOptions	1
for (size_t i = 0; i < N; i++) // TODO: do we need to test output tensor here as well?	1
ConfigArray layerTypes = config("layerTypes", L"Sigmoid"); // TODO: camelCase	1
// TODO: uncomment this code once bidirectional LSTM is supprted.	1
// ... TODO: ensure iterators do not return OOVs w.r.t. user symbol table	2
/* TODO:	1
// TODO: Rename to CustomCriterionNode?	1
if (fr.GetIterationDimension() != m_shiftDimParam) // TODO: this was removed; GetIterationDimension() is always -1 now	1
let val = dynamic_cast<Bool *>(value.get()); // TODO: factor out this expression	1
# TODO: If this is of general interest, consider to move it to progress_print.py	1
typedef std::string LabelType; // TODO: are these supposed to be the same as the DataReader's?	1
// TODO: Make this a trace option, e.g. enabled by the ComputeEnvironment.	1
// TODO: Reconcile this with the ComputationNode::Validate functionality in core CNTK to avoid duplication of inference logic	1
// TODO: this ^^ does not seem to belong here.	1
float defaultHiddenActivity = node->GetOptionalParameter("defaultHiddenActivity", "0.1"); // TODO: parameter should be called 'defaultHiddenActivation'	1
// convert it  --TODO: once we permanently use the new format, do this in fread() for V1	2
} // pass models in (to GPU) // TODO: rethink the naming of this function	1
// TODO: move into a separate header/class, to decouple from this class which would then be only used by old NDL and SimpleNetworkBuilder.	1
// TODO: this should have been renamed to CopyMBLayoutTo(), but it had the wrong signature??	1
// TODO: replace with an efficient version, e.g. IPG, block multiplier, Eigen, gemmlowp, etc.	1
// TODO: BUGBUG: if not starting from checkpoint, need to synchronize initial model	1
//  - the cached m_evalOrders[root], reordered to make nodes belonging to the same loop consecutive. TODO: Try not to do that.	1
// TODO: why not say *this = ColumnSlice()?	1
// TODO: This was only printed if >1 eval criterion. Why? Needed?	1
// TODO: Is there a better way to discriminate?	1
// TODO:@Amit Due to current limitation of the network builder, we can't bypass the memory copy operation at this step.	1
// TODO: We should use the separator that matches the include path.	1
// TODO: or does that only signal an issue, and we should still terminate ourselves?	1
m_gradHeader->numEvalNode = evaluationNodes.size(); // TODO: rename numEvalNode (plural)	1
// TODO: Move this class to Basics.h when it is required by more than one reader.	1
// TODO: Resize temp matrices here (not doing so does not really fail since for full matrices, class Matrix will resize by itself)	1
// TODO: a future version may want to enforce retaining the content, to allow dynamically growing layouts column by column (when size is not known upfront)	1
// TODO: use tensor lib, then this will be easy, no memsharing needed	1
// TODO: should m_learningRateMultiplier be set to 0? Or should every node have a way to add its own say on the learning rate for all its inputs?	1
// TODO: move the two-forward-pass support out of the reader, make a first-class citizen.	1
# TODO: pull this apart so that we can compute the encoder window only once and apply it to multiple decoders	1
// (helper for qsort() in printmatvaluedistributionf() below --TODO: use a lambda?)	2
// TODO: This is a stopgap. SGD will at some point change from sets of matrices to sets of nodes. Then this will become much simpler.	1
// ... TODO: we don't need this to be a class member, actually; try to just make it a 'static' function.	1
// TODO: The return value if this is never used except in loops where we do an &=. It is not clear whether that is a bug or intentionally prevents DataEnd() from being called.	1
/// TODO: do we need to handle special dimension values	1
# TODO: Would be cool, if the user could pass a dictionary with overrides. But maybe for a later version.	1
if (p == nullptr) // TODO: can we make this look the same as TypeExpected in BrainScriptEvaluator.cpp? We'd need the type name	1
// TODO: Switching to Boost would eliminate this problem.	2
// TODO: use our File class, so that we get the benefit of popen()	1
m_gradientCheckSigDigit = configSGD(L"sigFigs", 6.0); // TODO: why is this a double?	1
# TODO should handle swig path specified via build_ext --swig-path	1
// TODO: This function is duplicate of the one in HTLMLFReader.	2
// TODO: do this on PARTraversalFlowControlNode	1
#include <limits> // for isnan() and numeric_limits  --TODO: is that the right header?	1
// TODO: Currently, we have to preserve the same behavior for randomization in order to make all tests pass.	1
# TODO: allow user to specify learner through config file.	2
childrenInThisLoop, childrenInOuterLoop;    // TODO: think through what these mean when coming from PAR mode	1
// TODO: Allowing partial matches seems misguided. We should discourage that, or just remove it.	1
short t = *(short*)&v & 0x7FFF;    //TODO: Check this!	1
# TODO: make this work for multiple output case.	1
// TODO: change below accesses to TensorView, then this is no longer needed. This is now the case, but need to test it.	1
// TODO: drop "verbosity", use config.traceLevel() instead.	1
size_t GetNumParallelSequencesForFixingBPTTMode() override { return mToProcess.size(); } // TODO: or get it from MBLayout? Can this ever be called before GetMinibatch()?	1
{ // ^^ TODO: remove this	1
// TODO: Overload to enable compilation for DoublePrecision though its currently unsupported	2
// TODO: separate this out from BrainScript to an interface that still does type casts--possible?	1
// TODO: Turn rank into a member variable, and call this method once in validation (currently called for every single ForwardProp/BackpropTo()).	1
// TODO: process "outputNodes" etc. arrays: Sync to node Tags, and make them all roots.	1
# TODO: if initial_state is a CNTK Function rather than an initializer, then require to pass it multiple times; otherwise broadcast to all	1
public /*protected*/ ComputationNetworkOwnedNodeState, // TODO: figure the 'protected' business out, somehow the 'friend' thing does not work	1
} // TODO: correct for size_t?	1
// time so rpath will apply (Linux). // TODO find a better way	1
// TODO: Map dynamicAxis from name to node at this point, where that node is memoized inside NDL.	1
DeleteNode(oldNodeName); // TODO: can this just be RemoveNodeFromNet()?	1
// TODO: The following calculation relies on the ill-devised definition of "minibatch" of the current truncated BPTT implementation. Adapt this once fixed.	1
// TODO: find a simple topological order and allocateEvalMatrices on that order directly	1
// TODO: Make sure that the loaded model is the same as the trainer's model through UID matching in the V2 format	1
extra_link_args = [] # TODO: LINKER_DEBUG_ARG is not passed in to avoid compilation error	1
// TODO: Move to CommonMatrix.h	1
// TODO: move this to that function as well--just tired to pass everything as arguments	1
// TODO: try and remove support for this in the future, change the condition below to	1
# TODO: consider potential name clash; users might want to call their functions the same.	1
// TODO: Clarify how a single thread restriction can be lifted.	1
# TODO: allow to say sequential=False, axis=2, length=100, ... something like this	1
//TODO: additional options are not deserialized. This was not done when AdditionalOption was introduced.	1
// TODO: Implement batching of masking operation for masks residing on GPUs to avoid making	1
// TODO: do we want to support non-primary binary deserializers?	1
// TODO: is this header appropriate?	1
// TODO: either check when creating this whether this assumption is true, or control this through a flag in here.	1
typedef size_t cuda_size_t; // TODO: verify if this is consistent across CPU/CUDA, or use uint32 or so	1
// TODO: Check whether we should use node output arg name for the check below.	1
//TODO for very far future: Handle reduction on (multiple) batches all in once: batchAxesToReduce	1
// TODO: move the two-forward-pass support out of the reader.	1
objConfig.Insert("traceLevel", config(L"traceLevel", "0")); // TODO: fix this by adding it to all config blocks. Easy to fix in BS as 'config with [ traceLevel = 0 ]'.	1
// TODO: This message is written to stderr before stderr gets redirected to the specified file.  Fix this.	1
// TODO: just use a vector attach	1
// TODO: ignore if number of precision digits > FLT_[MANT_]DIG/DBL_[MANT_]DIG	1
Matrix<ElemType>& AssignVectorNorm2Of(Matrix<ElemType>& a, const bool isColWise); // TODO: arg should be const	1
// TODO: this incompatibility needs further investigation.	1
// TODO: we should do this in a constructor.	1
// TODO: This really should not be in cudalattice, since it is more general; we need a cudavector.cpp/h	1
// TODO: we may need to optimize this by a template arg	1
# TODO: must allow multiple variables, just like recurrence, as to allow beam decoding (permutation matrix)	1
//TODO: additional options are not serialized. This was not done when AdditionalOption was introduced.	1
// It is possible to specify labelType = "none" for either. --TODO: I only tested doing so for the first.	1
// TODO: move these to ComputationNetworkBuilder.cpp	1
// TODO: once this gets reimplemented using TensorView, then this is no longer needed.	1
# TODO: this should be a private class; use StreamDef instead	1
# TODO sparse for numpy arrays	1
/// TODO: Specify the constraints on the shapes of the operands.	5
// TODO: this does not work if mean/var inputs are not constant/parameters.	1
// TODO: Possibly set m_valid to false, but currently preserving the old behavior.	2
ElemType v = 0; // TODO: do this in 'double'?	1
// TODO: share most code with MatrixL1RegNode	1
// TODO: refactore commom code for float and double	1
else                                        randomize = readerConfig(L"randomize"); // TODO: could this not just be randomizeString?	1
// TODO: why are these not static? And why are they here?	1
// reconstruct old lattice format from this   --TODO: remove once we change to new data representation	1
// TODO: Move this up to where it is used (in a separate commit since git cannot track moving and changing at the same time).	1
// TODO: make type casts members of the SparseSequenceData	1
size_t sequenceDim = shape.size() - 2; // (only valid if pMBLayout)  --TODO: In case of multiple time dims, this must be adjusted.	1
// TODO: Should be unified with StreamDescription from the new reader API	1
takesBool = true; InvalidInfixOpTypes(e); // TODO: implement this, needs a C++ node	1
// TODO: Move the shared core functions to the front of this source file.	1
if ((!IsLeaf() || Is<RandomDistributionNode<ElemType>>()) && !RequiresPreCompute()) // TODO: guard this through overrides instead	1
// TODO: We actually just need a boolean matrix for this.	1
// TODO: when gcc -v is 4.9 or greater, this should be: std::regex_replace(rootpath, L"\\/+$", wstring());	1
m_temp->AssignElementProductOf(*m_temp, InputRef(2).ValueFor(fr)); // TODO: is Input(2) minibatch data? Confirm	1
// TODO this assumes training is picked up with nodes with zero parameters	1
// TODO: Not safe from the ABI perspective. Will be uglified to make the interface ABI.	3
if (mFile)  // TODO: Can this function be called multiple times? Then say so at the top function	1
// TODO: Should we use MPI_Bcast instead for better performance	1
DecideAndMoveToRightDevice(*this, idx, a); // TODO: only move target if beta != 0	2
// TODO: no scope here? ^^ Where does the scope come in? Maybe not needed since all values are already resolved? Document this!	1
// TODO: use f16c instructions if available	1
// TODO: currently assume that layout is the same between different streams, this will change.	1
// TODO: add more randomization types, and use a more meaningful scaling	1
// TODO: Should be removed. Currently a lot of end to end tests still use this one.	1
// ... TODO: change this to key() or something like this	2
// TODO: Think through what tags mean. Do we allow user-named tags? Is it a set or a single string? If set, then how to compare?	1
// FIXME use not yet existing NDShape function that returns the dimensions at once	1
// TODO: Change this to use an interface that is independent of <ElemType>.	1
// TODO: We should either invalidate and readapt the network if the backpropRoots change compared to what was specified when the network	1
size_t j = subsetsizes[subsetnum];                                           // return what we have  --TODO: we can remove the above full computation again now	1
// TODO: This is a bit indirect. Can it be done more nicely?	1
// ... TODO: put a resize() here and all matmul, so we don't need to set size upfront	2
// TODO: do not transpose after RNN ops so we have one code path here.	1
size_t usedrangeend = rangeend - (Nbits > 1); // TODO: make this a parameter	1
// TODO: Should not wait, simply publishing event on the compute stream should be sufficient	1
m_dataReaders[m_ioNames[i]]->SetNumParallelSequences(nbr); // the first one determines the param of all others --TODO: This is flimsy.	1
// TODO consider to change to ChunkIdType where appropriate	1
version = cntk.__version__ # TODO consider shortening	1
// TODO: how to deal with boundary flags?	1
// TODO: double-check all these	1
labelId = found->second; // TODO: This function is called Check...() but it does Get something. Bad name?	1
#ifdef CNTK_UWP // UWP-TODO: find a replacement for PathRemoveFileSpec	1
// ^^ TODO: can we do away with this entirely and replace it by map.order()/this->order()	2
// TODO: The format specifier should probably be "%ls" here, but I'm not making that change as part of	1
#include "QuantizedMatrix.h" // TODO: strangely, this must be included first, although it is the first thing MatrixQuantizer.h includes. Without, nvcc fails.	1
// TODO: Exclude inputs not belonging to 'gradients' from the gradient computation	1
bool m_timestampFlag;        // TODO: What does this do? TODO: camelCase	1
// TODO: Should be removed, when all readers go away, expects configuration in a legacy mode.	1
// TODO: Make this a runtime option.	1
// TODO: In sequence reader we probably don't need to store numbers in labels (we'll see)	1
// TODO: this only works in this specific case.	1
// TODO: This is not ideal. We will also need on-demand compilation, to allow any node to be used as an output after the fact.	1
// TODO: :: array. Check if that is the right operator for e.g. Haskell.	1
// TODO: This function is about determining the parallelization layout	1
Matrix<ElemType> mAlpha; // TODO: m_Alpha etc.	1
std::vector<edgeinfo> edges2;                 // TODO: rename these	1
// TODO: This will fail to compile under VS 2008--we need an #ifdef around this	1
vector<string> errMsgs; // TODO: These are created but actually not returned, only their count is checked.	1
#ifndef NONUMLATTICEMMI // TODO:set NUM lattice to null so as to save memory	1
// TODO: Implement proper corpus descriptor.	1
// TODO: just use return!	1
// TODO: should we make this explicit configuration parameter	1
public:                                // (TODO: better encapsulation)	1
// TODO: Get rid of that const_cast, as soon as after Ryan's Matrix-lib refactoring separated out SetValue() from external vs. from deep copy	1
// TODO this handling needs to be graceful, but currently CNTK complains when we return empty sequences.	1
MaskMissingColumnsToZero(m_logSoftmax, InputRef(1).GetMBLayout(), fr); // TODO: is this the right way to neutralize gaps?	1
// TODO: verify that the set of matrix names is identical	1
if axis.name == 'defaultBatchAxis':  # axis == Axis.default_batch_axis():  --TODO: how to do this right?	1
vector<size_t> m_switchFrame;        // TODO: something like the position where a new sequence starts; still supported?	1
// TODO: This should not be a config option, but rather inferred from dimensions of the Parameters.	1
// TODO: instead, remember the nodes directly, to be able to handle both float and double nodes; current version will crash for mixed networks	1
// TODO: fix this comment	1
// TODO: to be consistant with RNN and LSTM where Yhs is the only output.	1
// TODO: Randomization can be made simpler if we randomize only forwards.	1
// TODO: does this apply to anything else besides temporary node-internal intermediate results? What, for example?	1
// TODO: This is only used for testing whether a gradient has been allocated. Maybe reduce to bool HasGradient()?	1
// TODO: define an overload that takes const & for external users (which will then take a copy and pass it on to Apply &&)	1
// TODO: need to add support for other pooling types, for example,	1
# TODO: this is for internal purposes, so find better way	1
// TODO: can these be changed to ComputationNodeBasePtr?	1
#TODO: add additional options and learning context to the parameters of the updat_func so that the update function	1
# TODO: enable when it is exposed in c++	2
// TODO: 'direction' is really too general. signOfTimeOffset?	1
// overload binary operators between 'half' and build-in type. TODO: This should be handled in a better way	1
# TODO: this is not a convolution but a correlation, and W's shape has input and output depth reverted.	1
// TODO: get rid of dependency on ElemType	1
// TODO: Can this be called with a node that's already part of the network? This is currently allowed, but should it?	1
// TODO: The behavior can be simplified by only randomizing sequences forward.	1
// TODO: Check for IsPartOfLoop(). Also why not store the loop id in the node for direct lookup?	1
// TODO: two sets of functions, choose one	1
if (numframes != classids[j].size()) // TODO: remove this once we are confident	2
// TODO: Should be deprecated, use composite reader instead.	1
//TODO (backcompat): when loading a stale model we can still pass this test	1
double unkCount = 0; // TODO: why double?	1
} // TODO: clean this up	1
// TODO: use safe-save, i.e. write to temp name and rename at the end	1
// TODO: MBlayout and what is the perf hit for iterating/copying sequences.	1
struct ConfigurableRuntimeType // TODO: rename to ScriptableObjects::Factory or something like that	1
// TODO: implement Where op.	1
// TODO: fail on invalid	1
if (actualmbsize > m_mbSize * mToProcess.size()) // TODO: is this a LogicError?	1
// TODO: Should be removed, when all readers go away, expects configuration in a legacy mode.	1
# TODO: reenable this	1
// TODO: reference any additional headers you need in STDAFX.H`	1
// TODO: Currently there is implementation of these in the V2 library, but it is not exposed and requires linking dependency.	1
using namespace std; // ugh! TODO: get rid of this from .h files!!!	1
// TODO: Unroll 4-times for better performance leveraging vectorization	4
// TODO: Currently borrowed from the old reader, should be refactored.	1
// TODO: functionally they are the same, the only difference is how we handle	1
wstring op; // operation, encoded as a string; 'symbol' for punctuation and keywords, otherwise used in constructors below ...TODO: use constexpr	1
// TODO: it is not clear how to limit this only to hash, but we do not use partial specialization in other places.	1
// TODO: move to -Base (or -Network?)	1
// TODO: Currently preserving this for backward compatibility with current configs.	3
ComputationNodeBasePtr m_sourceNode; // one of the nodes of the loop   --TODO: What is the special meaning of this node? It seems to always be a delay node.	1
// TODO: we would need to add a sequenceMap type here as well	1
// ... TODO: use a constant to define the maximum KN count level,	1
//      TODO: This must be configured in a generic fashion where tensor axes are chosen along which parameters are tied.	1
//    pMBLayout->Set(0, m_switchFrame[0] - 1, MinibatchPackingFlags::SequenceEnd);   // TODO: can't we use Set()?	1
double mbEvalCriPos = criterionNodes[npos2]->Get00Element(); // TODO: make Get00Element() a function of ComputationNodeBase	1
/*TODO: merge with call site*/ void BackpropToS(const size_t inputIndex, const Matrix<ElemType>& invNorm0, const Matrix<ElemType>& invNorm1, const Matrix<ElemType>& functionValues,	1
// TODO: This could actually be strided?	3
// TODO: move the methods in this region under their respective headline	1
// TODO: Avoid copying.	1
// TODO: somehow the constructor overload from Thunk function fails to compile, so for now use MakeThunk instead	1
// TODO: we should redefine this to write UTF-16 (which matters on GCC which defines wchar_t as 32 bit)	1
else if (deviceIdB == deviceIdC && deviceIdB != CPUDEVICE) // TODO: why not the other two combinations?	1
// TODO: codecvt should be supported in the latest gcc,	1
- TODO:	1
// TODO: We should allow to reduce to a 0-length tensor if the dimension is 0	1
//TODO: avoid data copy.	2
m_devicesTransferedTo[0]    = other.m_devicesTransferedTo[0]; // TODO: spelling	1
// TODO: Do we really need these? Folks who want to use C++ can instead say net->AddNodeToNet(New<>(...)), which is not that different.	1
// TODO: possibly change to class File, we should be able to read data from pipelines.E.g.	1
// TODO: We should consider splitting data load from the description in the future versions.	1
/// TODO: We need to have native support for DictionaryValue<vector> and DictionaryValue<NDArrayView>.	1
// TODO: Should be removed at some point.	1
// TODO: CNTKLibrary.h should be cleaned up to allow header only dependencies.	1
// TODO: We can use this interface in more places.	1
// TODO: Does the same trick work for 2D images?	1
// TODO: timestampFlag or timestampingFlag? (Or timeStampFlag?)	1
// TODO: add keepExistingValues (default to true) argument so that the existing values are kept even after reallocation	2
Matrix<ElemType>& data = matrices.GetInputMatrix<ElemType>(iter2->first); // can be features or labels   (TODO: Really? Didn't we just ^^^ check that it is 'real'?)	1
// class fixed_vector - non-resizable vector  --TODO: just use std::vector	1
m_labelsIdBuffer = new /*typename IDataReader<ElemType>::*/LabelIdType[mbSize]();     // TODO: no "new" please! Use a vector	1
//TODO: Do we assume there is only one batch axis in the whole system?	1
// TODO: What are our sorted-ness guarantees?	1
memcpy(GetBlockIds(), v.GetBlockIds(), v.GetBlockSize() * sizeof(size_t)); // TODO: change block id from size_t to CPUSPARSE_INDEX_TYPE, and rename BlockSize to BlockCount	1
// TODO: ^^ it seems by saving the name in the ConfigValuePtr itself, we don't gain anything; maybe remove again in the future	1
// TODO: This function seems to be never called. Remove it if that is the case.	1
// TODO: there is some duplication of type checking; can we unify that?	1
// TODO: Replace with std::exclusive_scan when we switch to C++17	1
/*TODO: merge with call site*/ void ForwardPropS(Matrix<ElemType>& functionValues, const Matrix<ElemType>& unnormedPrior, const Matrix<ElemType>& mean, Matrix<ElemType>& logstddev,	1
if axis.name == 'defaultDynamicAxis' or axis.name == 'UnknownAxes': # TODO: how to do this right?	1
// TODO: document namedArgs--does it have a parent scope? Or is it just a dictionary? Should we just use a shared_ptr<map,ConfigValuPtr>> instead for clarity?	1
# TODO: need to merge with above. Can it simply be transpose=True?	1
// TODO make this nicer	3
// TODO: Is this logic of transitively constructing the output shape from the operands correct?	1
// TODO: The shape of the specified output Value object must match the actual output shape	1
m_temp->AssignDifferenceOf(InputRef(0).ValueFor(fr), *m_classZeroLabels); // TODO: need a slice for m_classZeroLabels?	1
std::map<ComputationNodeBasePtr, shared_ptr<File>> outputStreams; // TODO: why does unique_ptr not work here? Complains about non-existent default_delete()	1
foreach_index (k, randomizedchunks[0]) // TODO: this really cries for iterating using iterators!	2
// TODO: Should we do this for all of the axes in kernelShape that have a dimensionailty of NDShape::InferredDimension?	1
// TODO: bool read_only = 8;	1
# TODO: should the rate(s) be default_options?	1
// TODO: do we need to get blockroot if it is a block function?	1
// TODO: Do we need to take care of the sequence axis here (like before)?	1
m_pMBLayout->Init(m_numSeqsPerMB, 0); // (SGD will ask before entering actual reading --TODO: This is hacky.)	2
Matrix<ElemType> softMax_t = m_softMax.ColumnSlice(sz, nbr_wrd); // TODO: declare these outside of the loop to avoid the malloc	1
/*TODO: merge with call site*/ void BackpropToLeft(const Matrix<ElemType>& logOfRight, Matrix<ElemType> inputGradientValues,	1
# TODO: Should clone-replacing inputs with placeholders reset the shapes to unknown?	1
UNUSED(c); // TODO: this function seems like a stub	1
// TODO: The class will be further refactored and common based will be extracted with BlockRandomizer.	1
# TODO: check whether Values can be ingested directly	1
//    This is meant for the case where the entire string is a brace expression (TODO: is that true? [fseide]).	1
// TODO: This is similar but not identical to Common/DataWriter.cpp. Why is this DataWriter different? Can it be reconciled?	2
# TODO: add tests for this complex condition	1
// TODO: add a dictionary value constructor with an rvalue parameter.	1
// convenience accessors. TODO: use only one name. Rename the members themselves?	1
// TODO: should ignore <s>, check the sentence ending is </s>	1
// TODO: count VIRGINLOGZERO, print per frame	1
// TODO: this does not work completely.	1
// TODO: What should the data size be?	1
// TODO: move these out from this header into some more general place (I had to move them here because otherwise CNTKEval failed to compile)	1
// TODO: reference additional headers your program requires here	8
// TODO: ToBatchAxis also override batch size.	1
#define force_crash() (*((int *) -1) = 0)         // TODO: this does not in fact seem to crash it...	1
// TODO: direct process sparse data without copy	1
// TODO: if the prefetch is still valid, print a warning here!	1
// TODO: make node map to vector of FunctionPtr	1
// TODO: comment what this does, in a function called UpdateDataVariables	1
// TODO: verify - ONNX specifies that ImageScaler always need a batch axis	1
#ifdef __INTEL_COMPILER // TODO: check this	3
// TODO: OperationName calls static TypeName which does not match the actual type names in that the 'Node' is missing.	1
# TODO: rename V2 API function as well from reduce_log_sum() to reduce_log_sum_exp()	1
// TODO: Abstract this.	1
// TODO: Currently BPTT does not support sparse format as output.	1
# TODO: add sequential mode like Convolution()	1
// TODO: enable sequence_lens. It requires additional model input of batched sequence data layout.	1
// TODO: how does the file distinguish float vs double nodes?	1
// TODO: Support changing the device across different invocations of the forward method on a Function instance	1
continue; // already in the list  --TODO: use insert()	1
eof // TODO: what are true and false? Literals or identifiers?	1
// TODO: test parsing of i => j => i*j	1
// ... TODO: change all of basetypes classes/structs to use this	1
// TODO: Make topK a constructor parameter	1
// TODO: replace by std::optional, once it's fully supported by VS.	1
// TODO: Is supplying dense data for an Input variable tagged as sparse, a fatal error even for packed value objects?	1
// TODO: when ShiftNode lands, check this as well. Ideally just test whether ptr is a IRecurrentNode	1
// TODO: this is not efficient--we only use a block-diagonal-like structure, rest is empty (exploiting the fixed boundaries)	1
# TODO: better say right here what the requirement is!	1
// TODO: make bidirectional LSTM work by figuring out output data	1
// TODO: this can only be cached once --but there is no check whether a different model is passed	1
// TODO: cannot call ProcessOutputs because we want the final output to have the expected ArgNode name	1
// TODO: lots of code dup with the other Prepare function	1
// TODO: We unpack the same output gradients each time this method is called for a different input.	1
// TODO: add -Node to the class names	1
// TODO: This is in a somewhat partial state in that we now have a global eval order (keyed by a nullptr), but don't use it yet.	1
// User is allowed to manipulate the frames... for now--TODO: move silence filtering here as well	2
m_defaultHiddenActivity = config("defaultHiddenActivity", "0.1"); // TODO: spelling, should be -Activation	1
CUDA_CALL(cudaEventCreate(&done)); // TODO: why not condition on do_sync, so that we can use SyncGuard?	1
// TODO: Constructor to move a specified NDArrayView value	2
// TODO: We should return 0 here.	1
// TODO: Should we use MPI_Bcast instead for better performance	1
// TODO: Call this ShallowClone instead?	1
InputRef(0).ValueAsMatrix().AssignDiagonalValuesTo(ValueAsMatrix()); // TODO: use tensor lib; this is a stride operation	1
// TODO: Would it be useful to allow one of the two to be a single column?	1
if (std::find(strType.begin(), strType.end(), L"SIMPLENET") != strType.end()) // TODO: camelCase	1
// TODO: Move NonCopyable as a separate class to Basics.h	1
newNode->SetInput(i, oldNode->GetInputs()[i]); // TODO: use AttachInput()?	1
//    return; // actually a failure  --TODO: This should not be necessary. Why is it?	1
return (long) linecnt; // TODO: change to size_t	1
// TODO: currently this code moved from the old block randomizer.	1
// TODO: use ONNX Expand once ONNX version 7 is supported	1
// TODO: OMP adds LOTS of overhead. Do we need a guard, a min size when to use it?	1
us(i,0) += other(i,t) * weight; // TODO: SSE version (very easy)	1
// TODO: Deprecate like PerDimMeanVarNormalizationNode as soon as we have a test case. Or just delete it.	1
size_t seqIndex;          // parallel-sequence index; SIZE_MAX = all sequences in MB (most common case)  --TODO: Bad name, 'sequence' and 'parallel sequence' are two different things	1
reduction_rank=1, # (0 means input has no depth dimension, e.g. audio signal or B&W image)  --TODO: call it item_rank?	1
// TODO: This should be a runtime check, not an assert() that only runs in Debug.	1
// TODO: If the condition is a scalar constant, we can just pass-through the appropriate operand	1
// TODO: allow a tensor descriptor. Or allow 0 (inference). Maybe already supported--check this.	1
// TODO: This should be moved to the packers when deserializers work in sequence mode only.	1
// TODO: We need to decide what reshaping means in presence of a tensor.	1
// TODO: get rid of this source file once static initializers in methods are thread-safe (VS 2015)	1
exprPath; // TODO: create a composite dictionary	1
// TODO: this becomes a hard lookup with failure	1
// TODO: Why do we need these typecasts? (without it will fail with "cannot access private member declared in class 'Microsoft::MSR::CNTK::CPUMatrix<float>'")	1
// TODO: Can we remove this, and have users use SetValue() instead? To avoid this potential error?	1
//// TODO: uncomment this code once LotusRT output shape matches ONNX	1
backptroffsets[L.edges.size()] = backptrbufsize; // (TODO: remove if not actually needed)	1
{ // TODO: alignresult will change to (start,end)	1
// TODO: these are scalar operations--why are they in Matrix?	1
// TODO: Check for error code and throw if !std::uncaught_exception()	3
if (mFile) // TODO: can this ever be called without an open file?	1
// TODO: set key as BatchNormalization instead of BatchNormalizationCaffe	1
#return init # TODO: change to this once this works, e.g. for layers.BatchNormalization()	1
ConfigArray str_rnnType = config("rnnType", L"SIMPLENET"); // TODO: camelCase	1
// TODO: callers of this often do ComputationNetwork::BumpEvalTimeStamp(featureNodes) and also for labels; we should eliminate the need for this.	1
m_threadsPerBlock = N; // don't launch more than necessary  --TODO: Does this make a difference at all?	1
let memberIds = record->GetMemberIds(); // TODO: test this after change to ids	1
// TODO: In the future we should validate not on the flat list but the PARTraversalFlowControlNode structure. Then this will be unnecessary.	1
Matrix<ElemType>* currParamsGradient = &(node->Gradient()); // TODO: we can use shared_ptrs now	1
#include <fstream>    // for LoadMatrixFromTextFile() --TODO: change to using this File class	1
// NV_TODO: investigate cub support for half	1
// TODO: is there a thing as nested loop?	1
// TODO: factor these lines into a separate function	1
// TODO: For V1 format models make sure that the loaded model is isomorphic to the trainer's model	1
//TODO: enable reference mb size for each rate	1
// TODO: Should be moved outside of the reader.	1
for (auto& child : children) // TODO: do we need a check that this is stable if isFinalValidationPass?	1
// special accessor for sequence training  --TODO: must be replaced by a different mechanism	1
// TODO: do that inside the loop to avoid copying, but one thing at a time	2
#pragma warning(disable : 4996)   // ^^ this does not seem to work--TODO: make it work	2
// TODO: we will later have code that adds this path if needed	1
<!-- TODO warn if ConfigurationType not (yet) defined -->	1
// TODO: Then we can also share the MBLayout; which currently is copied by value.	1
// TODO: Can it ever be not on the CPU? We allocate it ourselves abovew	1
// TODO: this shall be handled internal to UniqueNodeNameStorage	1
srand(++m_randomSeed); // TODO: older code did not have that; so no idea what random seed was used	1
class signallingevent // TODO: should this go into basetypes.h?	1
// TODO: This is an unusual use of this operator. Remove this.	2
// TODO: Changing the TensorShape does not seem to belong here.	1
sprintf(str, "learningRateMultiplier=%f  NeedsGradient=%s", m_learningRateMultiplier, m_learningRateMultiplier>0 ? "true" : "false"); // TODO: update NDL to accept a better matching name as well	1
flowControlNode.m_nestedNodes = c.Nodes(); // TODO: make these two part of the constructor	1
if (m_numRows * numCols > 0) // TODO: remove if unnecessary	1
// TODO: merge with training criteria	1
// TODO : crop_automatic	1
// TODO: Is it at all meaningful to allow no features section?	1
// TODO: Why is this produced by the reader, and not just realized through the use of delay nodes in the network?	1
# TODO: make sure the xD versions have all the needed parameters	1
// TODO: where does the current scope come in? Aren't we looking up in namedArgs directly?	1
# TODO: does this belong into .sequence?	1
// TODO: The following should be reconciled with the equivalent code in the CNTK implementation	1
// TODO: there should be a map from output nodes to inputs, so that this operation doesn't take square time	1
#include "Windows.h" // for CRITICAL_SECTION and Unicode conversion functions   --TODO: is there a portable alternative?	1
// TODO: All forms that require specified dimensions but contain zeroes (to be updated by graph)	1
for (const auto& groupNode : nodeGroup) // TODO: is there an STL algorithm?	1
enum class TrainingCriterion : int // TODO: camel-case these	1
//    // TODO: batchSize is fixed to one. Needs to find out how to handle bacth axis as a free dimension.	1
// TODO: change to STL containers	1
// TODO: be more consistent--we should clear out edges[] at this point!	1
refNet->GetMBLayoutPtrOfNetwork()->CopyFrom(net->GetMBLayoutPtrOfNetwork()); // TODO: This is UNTESTED (before this was missing, seemingly inconsistently)	1
// TODO: In V1 graph generation, ReconcileDynamicAxis() should be treated like a no-op if the axis is known to be the same.	1
LogicError("TODO: implement phoneBoundaries setting in Binary MLF deserializer.");	1
// TODO: is this ^^ actually still used anywhere?	1
// TODO JC Refactor eligible methods and members into abstract base class.	2
// TODO: avoid hardcoded values	1
# TODO: reconsider the name. Windowed()?	1
strtmp = trim(strtmp); // TODO: operates in-place, so no need to re-assign to itself	1
# TODO: Test whether this is needed. We should instead just take whatever reduction dimension is given here as that of the input.	2
// TODO: move the two-forward-pass support out of the reader.	1
// TODO: profile, probably need to have some form of	2
// TODO: this copying here is redundant, value should be moved from the dictionary to the variable.	1
// TODO: Are all these meant to read out a scalar? Then rename and verify dimensions.	1
// TODO: support bias in CNTK op.	1
// TODO: Comment why we need a second ElemType.	1
// TODO: device.Type should be called Kind.	1
// TODO: These rvalue references are no longer adding value, change to const<>&	1
// TODO: This should be read in one place, potentially given by SGD.	1
Matrix<ElemType>& nbs = *matrices[L"numberobs"]; // TODO: what is this? We fall back to a different node?	1
// TODO: Why do we have two read functions? Is one not a superset of the other?	1
// TODO: the following two lines are a workaround for a bug in the Math library	1
// TODO: Implement optimized diagonal functions for sparse matrices. For now copy to dense first.	1
// TODO: Implement this with TensorView::DoElementwiseProductOf() and stride magic	1
# TODO: add a hash of the chunk	1
// TODO: what are these?	1
// TODO: Move augmentation to the separate class outside of deserializer.	1
// TODO: Is this ever called with anything other than 0?	1
else if (arg.Is<ConfigRecord>()) // TODO: should have its own ToString() method	1
// TODO: Redo this leveraging that we now have shared_ptrs. It is probably even OK if both networks share feature nodes.	1
// TODO: m_sliceViewOffset has a different meaning in sparse (column offset) versus dense (byte offset to start of pointer). This should perhaps be fixed.	1
const auto &transcripts = labels.allwordtranscripts(); // (TODO: we could just pass the transcripts map--does not really matter)	2
// TODO: verify that all these operations on m_result really can do in-place (or use different methods instead)	1
// TODO not nice, but don't know how to access these frames otherwise	2
void Resize(const Matrix<ElemType>& other) // TODO: Should this carry over numNZElemToReserve for sparse matrices?	1
// TODO: Waiting Skype smart reply with attention model before enabling the functionality of tracking sequence dimension.	1
// TODO: change the data member names back to m_ syntax, or get team consensus to not do that	1
const size_t jRand = jSample; // TODO: This seems unfinished.	1
// TODO: use a reference for m_labelInfo[index]	1
// TODO: change this back to COMPLETED, double underscores don't look good in output	1
// TODO: maybe change to type id of an actual thing we pass in	1
# TODO: ^^ should no longer be needed; delete once confirmed	1
# TODO: allow float64 dtype.	1
virtual NodeStatePtr ExportState() // TODO: can we instead pass the shared_ptr object in? So we don't need to create a new one all the time? Or should we still take ownership of the ptr?	1
// TODO: This will in the future be able to hold sub-ranges for nested loops as well.	1
// TODO: Should switch to boost, boost::iequal should be used instead.	1
// main TODO items:	1
// TODO: Why is this allowed? Why not terminate?	1
// TODO: customize this function for all nodes that uses temp internal matrices.	1
// TODO: This can be done more efficiently, we know the range of chunks already.	1
# h(t) = (1 - i(t) .* h'(t)) + i(t) .* h(t-1)                     --TODO: need to confirm bracketing with NVIDIA	1
// TODO: There are several functions below that perform an in-place operation	1
char ch2[MAXSTRING]; // TODO: This is 0.5 MB right here on the stack. Really?	1
// TODO: to figure out sparse matrix size	6
// TODO: ^^ should this be   void CopyMBLayoutTo(MBLayoutPtr pMBLayout);	1
//  - initValue=array or nested array --> initialize from this value, infer dimensions  --TODO: not implemented yet	1
// TODO: Should be removed when BPTT follows proper minibatch size.	1
// TODO: Where should the MBLayout be created--in BeginForwardProp() or ForwardProp()?	1
// TODO: should be deprecated, SetConfiguration should be used instead.	1
// TODO: implement m_colStride	1
// TODO: this is for sink or source - what type of variable for it?	1
// TODO: verify that srcTensors has consistant shapes	1
# TODO:	1
// TODO: the following is not templated--do it if needed; also should return a const reference then	2
scale = Parameter(_INFERRED, init=initial_scale, name='scale')  # TODO: if this gets usage then offer a Softplus version like Stabilizer() for stability?	1
// TODO: This is super ugly. The internals of the storage object should be a shared_ptr.	1
// TODO: Should be deprecated. Composite reader should be used instead.	1
# TODO dynamic axis for numpy arrays	1
auto argVal = move(args[i]); // value of the parameter  --TODO: Is this ever unresolved?	1
// TODO: This distinction should not be necessary anymore. Calling GetEvalOrder(nullptr) will have the same effect.	1
// TODO: This was used more broadly, but no longer, so we may be able to simplify the signatures of the virtual functions.	1
// TODO: switch all uses if isspace() etc. to this once tested well	1
// TODO: This does not seem to work well, most places use wtocharpath() instead. Maybe we can remove this.	1
let trainLossSinceLastLogged    =      epochCriterionSinceLastLogged.Average(); // TODO: Check whether old trainSamplesSinceLastLogged matches this ^^ difference	1
// TODO: Noone else overrides this method. So is this the right mechanism?	1
} // TODO: no, use our own time measurement	1
# So we emulate those dimensions on this level. TODO: Once this is suppored by the C++ code, remove the emulation here.	2
# TODO temporary	1
//  - transition scores: square transition matrix,  --TODO: log?	1
// TODO: make this proper C++ functions with variadic templates and a name that reflects their difference to fprintf(stderr) which already implies printing to log	1
// ... TODO: rethink the resize business. It is for shrinking only.	2
if (transcript[0].firstframe != 0) // TODO: should we store the #frames instead? Then we can validate the total duration	1
return numAllSamples; // TODO: Return the actual number of samples, by inquiring our own input nodes; then eliminate the numAllSamples parameter.	1
// TODO: We could do this on the GPU, but for now C++ is easier.	1
# TODO: Per discussion with sayanp, the underlying C++ code is not fully functional, so this	1
// TODO: Should this add a tensor dimension?	1
// TODO: make the following methods also private in Java, after CreateBatch/CreateSequence/... methods are implemented there.	1
// TODO: the following checks should throw, but I don't dare in case this will crash a critical job... if we never see this warning, then	1
// TODO: if it is an identity op, we shall peek its input node to find the correct tensor element type.	1
// TODO: change this ^^ to the const & version of Apply() once it is there	1
// TODO: in SimpleNetworkBuilder, this is very often followed by InitLearnableParameter()--we should have an overload that just does it right away	1
// TODO: NormalGrad is a misnomer here. Come up with a better name.	2
# TODO: consider using cntk.ops.one_hot instead of _index_to_vector	1
// TODO: how many of these do we have? labelInfoIn, Min, Out, Max, and there must be exactly 2?	1
// TODO: Extract an interface.	1
// TODO: Should we prepare the CSC data directly on the CPU and move it in one go?	1
# TODO: dim should be inferred from the file, at least for dense	1
if (configParam.Exists("learningRateMultiplier")) // TODO: should this be a test for 'true' rather than Exists()?	1
else if (arg.Is<ConfigArray>()) // TODO: should have its own ToString() method	1
// TODO (this does not really break it since for full matrices, class Matrix will resize by itself)	1
// TODO: separate into nodes that have inputs and those that duplicate functions with input adding except just not adding inputs. Clear?	1
ElemType stddevs = 4.0f; // TODO: make this a parameter	1
// TODO: Input variables currently are required to have the default batch axis	1
// TODO: avoid hardcoded values	1
// TODO: ONNX data types other than float and double are	1
// TODO: Possibly expose a limiting counter on the number of samples for validation.	1
// TODO: We are reusing the aggregation logic inside SimpleDistGradAggregator, which has a heavy dependency	1
# TODO: conflict of parameter order: filter_shape or num_filters first?	1
for (typename map<wstring, BatchLUSequenceReader<ElemType>*>::iterator p = mReader.begin(); p != mReader.end(); p++) // TODO: range-based for	1
//TODO: test code for linking with mkldnn.dll, will extend to support dilated convolution with MKL-DNN later	1
assert(false); // TODO: implement later	1
#else       // This does not actually work.  --TODO: find out why	1
// TODO: why is this a class, and not just a procedure? Then we wouldn't have to include the massive header	1
// TODO Remove m_pMBLayoutOfNetwork altogether. See issue 358.	1
// TODO: uncomment this code once LotusRT output shape matches ONNX	1
m_gradType.targetAdagradAvDenom = configSGD(L"fsAdagradTargetAvDenom", 1.0); // TODO: deprecated parameter kept for back compat (set to 0.0025 inconjunction with reenabling the static bug)	1
ElemType v = gradientValues.Get00Element() / (functionValues.Get00Element() + EPS_IN_INVERSE); // TODO: GPU inefficiency	1
// TODO: change those strings into wstrings to avoid this conversion mess	1
const size_t j14 = j1 & ~3;          // ... TODO: put this back--when stuff works again	2
// TODO: merge with the code above, but we first need to get the nbrUttPerMinibatch initialized inside each reader	1
// TODO: validate whether the passed matrices set matches reader section definitions	1
// TODO: move this into shim for the old readers.	1
// TODO: need to go down to all levels, maybe search for sectionType	2
// The forms that infer the dimensions have different BrainScript names. TODO: need one for fromFile	1
typedef std::map<std::wstring, ConfigValuePtr> NamedParams; // TODO: maybe even not use a typedef, just use the type	1
template <class ElemType> // TODO: needed?	1
#include "TrainingNodes.h" // TODO: we should move the functions that depend on these to the .cpp	1
// TODO: We should observe if these actually make a speed difference, and if not, remove these special cases.	1
else if (std::find(strType.begin(), strType.end(), L"CLASSLM") != strType.end()) // TODO: camelCase	1
// TODO: should this be a static member of above class?	1
// TODO: Rename to Exports.cpp	1
// TODO: Remove type from the parser. Current implementation does not support streams of different types.	1
// TODO: this should be bool. Change when config per deserializer is allowed.	1
// TODO: to figure out sparse matrix size	2
#include "ComputationNetworkBuilder.h" // TODO: We should only pull in NewComputationNodeFromConfig(). Nodes should not know about network at large.	2
return; // no need to do anything if already initialized. TODO: make it singleton	1
// TODO: change to use STL vector instead	1
// TODO: the check for NeedsDynamicValidation() is a temporary resolution and needs to be properly handled when we look at support for free dimension convolution inputs.	1
TODO: Investigate to remove it.	1
// TODO: we should pass the expression name to construct() as well	1
// TODO: change these to take an actual object instead of a name for dynamicAxis	1
// TODO: Try removing this branch. May not be needed after batch dimension fix.	1
// TODO: investigate performance of implementation, function signature and efficiency	1
UniqueSequenceId seqId; // unique sequence id (or GAP_SEQUENCE_ID--TODO: don't include gaps here)	1
// TODO: add a test case for this code path.	1
// TODO: this should be a bulk operation; this implementation is a quick hack	1
// only one criterion so far TODO: support multiple ones?	1
// TODO: This implementation takes a lot of stack space. Should break into many sub-functions.	1
const auto &hmm = hset.gethmm(unit.unit); // TODO: inline these expressions	1
// hide new so only stack allocated   --TODO: Why do we care?	1
// TODO: need to know whether we want to see '\n' or not	1
// TODO: how to handle cases where batch_size is not 1?	1
} // TODO: check if correct	1
// TODO: The set methods should be offered in template from	1
// TODO: Do we need an Unregister to unload the module?	1
// TODO: In case when the network requires less inputs,	1
// TODO: giving up moving stuff for now, running out of time. The following #includes should not be necessary once the hard-working code in here gets moved to .cpp	1
// ... TODO: while (IsWhiteSpace (c)) c = fgetc (f);      // skip trailing space	2
RuntimeError("Input label expected to be a category label");  // TODO: ensure this at config time (maybe keep an assert() here as a reminder to the reader)	1
// ... TODO (?): return true/false to indicate whether anything changed.	2
#pragma omp parallel for // TODO: Depending in circumstance, it may be more efficient to parallelize over rows.	1
y = temp; // TODO: ::swap(x,y)?	1
// TODO: randomizer to collect how many copies each transform needs and request same sequence several times.	1
#define ON_CUDA 0 // TODO: this does not work for some combination--fix this	1
float m_learningRateMultiplier;    // update parameters? Only used for LearnableParameters.    --TODO: Should we make this a member of LearnableParameters actually? And require a type cast? Currently it is read out for all leaves.	1
// TODO: is this really efficient? One thread per value?	1
// TODO: We should be able to configure IO chunks based on size.	1
ResetEvalTimeStamps(); // invalidate all m_value fields  --TODO: redundant (called over again for every root node). Make this private and only call for sets of nodes.	1
extern void operator||(cudaError_t rc, const char *msg); // TODO: imported from cudamatrix.cpp --better move to cudalib.h	1
// TODO: Waiting Skype smart reply with attention model	2
// TODO: Output stream descriptions - this should come from the network so that we can check	1
// TODO: Would it be sufficient to check against our own time stamp, so that we can use a unified time-stamping mechanism? Then we'd not need this special check for delayed nodes; just check all inputs against our own time stamp.	1
m_pMBLayout->Init(requestedMBSize, 0); // (SGD will ask before entering actual reading --TODO: This is hacky.)	1
ElemType* arr = m_GPUMatrix->CopyToArray(); // TODO: unnecessary allocation/copy; why not make this a vector that we move over as an rvalue ref?	1
// TODO: more details when tracing warnings	1
protected:                // TODO: should be fully encapsulated here	1
// TODO: Use using and virtual inheritance after switching to VS2015.	1
// TODO: This should use DataFor(). But for that, DataFor() will have to move out from ComputationNode. Ah, it has!	1
if (mFile == nullptr) // TODO: why check here and not when it is being opened?	1
// TODO: is copy really necessary here?	1
/* FIXME We would love to do the following, but the hashing does not	2
// TODO most of the time, we can advance to the right sequence here	1
return Constant(0) # note: don't pass None to past_value, because that would default to float32 --TODO: still the case?	1
if (!_wcsicmp(randomizeString.c_str(), L"none")) // TODO: don't support case-insensitive option strings in the new reader	1
// TODO: we can optimize for 1 bit here - very simply use a template arg 'isonebit'	1
// TODO: merge these two types	1
# FIXME figure out how to only SKIP the doctest in CPU	1
public:                       // TODO: make private (currently used from masking and DataFor) ; TODO: rename all members with m_ prefix	1
// TODO support multiplication on GPUs as well.	1
// TODO: This can be done more nicely. We should keep the block structure.	1
childrenInThisLoop, childrenInOuterLoop; // TODO: think through what these mean when coming from PAR mode	1
// ... TODO: when I make this public, LinearTransform.h acts totally up but I cannot see where it comes from.	1
// TODO: all logging functionality should be refactored to live in a logging utility class.	1
// TODO: This must be a runtime check, not an assert().	2
double learnRatePerSample = 1.0f / 8.0f / 0.618f / sqrt((double) m_mbSize[epochNumber]); // TODO: comment on these magic constants	1
// TODO: This constructor should be deprecated in the future. Compositional config should be used instead.	1
// TODO: This is very close to the planned SpliceNode (just make m_spliceDim actually configurable) except for splicing along time.	1
// TODO: We should avoid this copy but that requires carefully managing the	2