欢迎您访问程序员文章站本站旨在为大家提供分享程序员计算机编程知识!
您现在的位置是: 首页

浅浅理解Kaldi中的IO流

程序员文章站 2022-04-11 17:28:38
...

理解Kaldi中的IO流

1 Read

base/io-funcs.cc

void ReadToken(std::istream &is, bool binary, std::string *str) {
  KALDI_ASSERT(str != NULL);
  if (!binary) is >> std::ws;  // consume whitespace.
  is >> *str;
  if (is.fail()) {
    KALDI_ERR << "ReadToken, failed to read token at file position "
              << is.tellg();
  }
  if (!isspace(is.peek())) {
    KALDI_ERR << "ReadToken, expected space after token, saw instead "
              << CharToString(static_cast<char>(is.peek()))
              << ", at file position " << is.tellg();
  }
  is.get();  // consume the space.
}

void ExpectToken(std::istream &is, bool binary, const char *token) {
  int pos_at_start = is.tellg();
  KALDI_ASSERT(token != NULL);
  CheckToken(token);  // make sure it's valid (can be read back)
  if (!binary) is >> std::ws;  // consume whitespace.
  std::string str;
  is >> str;
  is.get();  // consume the space.
  if (is.fail()) {
    KALDI_ERR << "Failed to read token [started at file position "
              << pos_at_start << "], expected " << token;
  }
  // The second half of the '&&' expression below is so that if we're expecting
  // "<Foo>", we will accept "Foo>" instead.  This is so that the model-reading
  // code will tolerate errors in PeekToken where is.unget() failed; search for
  // is.clear() in PeekToken() for an explanation.
  if (strcmp(str.c_str(), token) != 0 &&
      !(token[0] == '<' && strcmp(str.c_str(), token + 1) == 0)) {
    KALDI_ERR << "Expected token \"" << token << "\", got instead \""
              << str <<"\".";
  }
}

base/io-funcs-inl.h

// Template that covers integers.
template<class T> inline void ReadBasicType(std::istream &is,
                                            bool binary, T *t) {
  KALDI_PARANOID_ASSERT(t != NULL);
  // Compile time assertion that this is not called with a wrong type.
  KALDI_ASSERT_IS_INTEGER_TYPE(T);
  if (binary) {
    int len_c_in = is.get();
    if (len_c_in == -1)
      KALDI_ERR << "ReadBasicType: encountered end of stream.";
    char len_c = static_cast<char>(len_c_in), len_c_expected
      = (std::numeric_limits<T>::is_signed ? 1 :  -1)
      * static_cast<char>(sizeof(*t));
    if (len_c !=  len_c_expected) {
      KALDI_ERR << "ReadBasicType: did not get expected integer type, "
                << static_cast<int>(len_c)
                << " vs. " << static_cast<int>(len_c_expected)
                << ".  You can change this code to successfully"
                << " read it later, if needed.";
      // insert code here to read "wrong" type.  Might have a switch statement.
    }
    is.read(reinterpret_cast<char *>(t), sizeof(*t));
  } else {
    if (sizeof(*t) == 1) {
      int16 i;
      is >> i;
      *t = i;
    } else {
      is >> *t;
    }
  }
  if (is.fail()) {
    KALDI_ERR << "Read failure in ReadBasicType, file position is "
              << is.tellg() << ", next char is " << is.peek();
  }
}

结合这两个文件,再结合nnet-computation.cc中的Read方法,这里写个demo:

int main(void){
	int32 valueA,valueB;
	bool binary;
	Input ki("demo.bin", &binary);
	std::istream& is = ki.Stream();
	ExpectToken(is, binary, "<ValueA>");
	ReadBasicType(is, binary, &valueA);
	ExpectToken(is, binary, "<ValueB>");
	ReadBasicType(is, binary, &valueB);
	std::cout<<"valueA="<<valueA<<" "<<"valueB="<<valueB<<std::endl;
}

在io-funcs-inl.h中有其他Read模板,可以借鉴。

2 Write

结合Read的参考文件,这里也可以写个write的demo:

int main(void){
	int32 valueA,valueB;
	bool binary=true;
	Output ko("demo.bin", binary);
	std::ostream& os = ko.Stream();
	WriteToken(os, binary, "<ValueA>");
	WriteBasicType(os, binary, valueA);
	WriteToken(os, binary, "<ValueB>");
	WriteBasicType(os, binary, valueB);
}
相关标签: 语音