文件解析之CSV
程序员文章站
2024-03-21 14:45:04
...
最近使用CSV文件做过一个项目,实现了一个CSV文件解析类如下:
upub.h
#pragma once
using namespace std;
#include <iostream>
#include <fstream>
#include <sstream>
#include <deque>
#include <vector>
#include <stack>
#include <string>
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <time.h>
#define FLOAT_MAX (3.4e+38)
#define FLOAT_MIN (3.4e-38)
#define DOUBLE_MAX (1.7e+308)
#define DOUBLE_MIN (1.7e-308)
typedef enum ussvtype {
USS_VTYPE_NULL = 0,
USS_VTYPE_BOOL,
USS_VTYPE_INT32,
USS_VTYPE_UINT32,
USS_VTYPE_DOUBLE
}ussvtype;
typedef struct ussvar {
ussvtype vt;
union {
bool bol;
signed int i32;
unsigned int u32;
double dou;
}val;
} ussvar;
ucsv.h:
#include "upub.h"
#pragma once
// using namespace std;
#include <iostream>
#include <fstream>
#include <sstream>
#include <deque>
#include <vector>
#include <stack>
#include <string>
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <time.h>
#define FLOAT_MAX (3.4e+38)
#define FLOAT_MIN (3.4e-38)
#define DOUBLE_MAX (1.7e+308)
#define DOUBLE_MIN (1.7e-308)
typedef vector<string> ROWVEC;
enum
{
FILE_STATE_NULL,
FILE_STATE_READ,
FILE_STATE_WRITE,
};
class ufilecsv
{
public:
ufilecsv();
~ufilecsv();
bool open(bool bIsRead, const char* strPath, const char* strFilename);
bool close();
bool setio();
bool sethead(const char* head);
// 读接口
template<class T>
bool read(const char* strFieldName, T& data)
{
if (m_nFileState != FILE_STATE_READ)
{
return false;
}
int n = findfield(strFieldName);
if (n == -1 || n >= m_CSVCurRow.size())
{
return false;
}
/*can't support string which contains space, so add readstring()*/
stringstream ss;
ss << m_CSVCurRow[n];
ss >> data;
return true;
}
// 写接口
template<class T>
bool write(const char* strFieldName, T data)
{
if (m_nFileState != FILE_STATE_WRITE)
{
return false;
}
int n = findfield(strFieldName);
if (n == -1)
{
return false;
}
/*can't support string which contains space, so add writestring()*/
stringstream ss;
ss << data;
m_CSVCurRow[n] = ss.str();
return true;
}
bool readstring(const char* strFieldName, char* data, int size)
{
if (m_nFileState != FILE_STATE_READ)
{
return false;
}
int n = findfield(strFieldName);
if (n == -1 || n >= m_CSVCurRow.size())
{
return false;
}
strncpy(data, m_CSVCurRow[n].data(), size-1);
return true;
}
bool writestring(const char* strFieldName, char* data, int size)
{
if (m_nFileState != FILE_STATE_WRITE)
{
return false;
}
int n = findfield(strFieldName);
if (n == -1)
{
return false;
}
m_CSVCurRow[n] = data;
return true;
}
private:
void readhead();
void parserow(const char* strRow, int nSize, ROWVEC& result);
int findfield(const char* strRow);
private:
int m_nFileState;
fstream m_CSVFile;
ROWVEC m_CSVHead;
ROWVEC m_CSVCurRow;
char m_headline[4096];
};
ucsv.cpp:
#include "stdafx.h"
#include "ucsv.h"
//string转wstring
std::wstring Utf82Unicode(const std::string& utf8string)
{
int widesize = ::MultiByteToWideChar(CP_UTF8, 0, utf8string.c_str(), -1, NULL, 0);
if (widesize == ERROR_NO_UNICODE_TRANSLATION)
{
throw std::exception("Invalid UTF-8 sequence.");
}
if (widesize == 0)
{
throw std::exception("Error in conversion.");
}
std::vector<wchar_t> resultstring(widesize);
int convresult = ::MultiByteToWideChar(CP_UTF8, 0, utf8string.c_str(), -1, &resultstring[0], widesize);
if (convresult != widesize)
{
throw std::exception("La falla!");
}
return std::wstring(&resultstring[0]);
}
int replace(string& src_str, const string& old_str, const string& new_str)
{
int count = 0;
int old_str_len = int(old_str.length());
int new_str_len = int(new_str.length());
int pos = 0;
while((pos=int(src_str.find(old_str,pos)))!=string::npos)
{
src_str.replace(pos,old_str_len,new_str);
pos+=new_str_len;
++count;
}
return count;
}
ufilecsv::ufilecsv()
{
memset(m_headline, 0, sizeof(m_headline));
}
ufilecsv::~ufilecsv()
{
}
void ufilecsv::readhead()
{
char strHeadLine[4096];
m_CSVFile.getline(strHeadLine, sizeof(strHeadLine));
if ((m_headline[0] != 0) && (strcmp(m_headline, strHeadLine) != 0))
{
parserow(m_headline, sizeof(m_headline), m_CSVHead);
}
else
{
parserow(strHeadLine, sizeof(strHeadLine), m_CSVHead);
}
}
void ufilecsv::parserow(const char* strRow, int nSize, ROWVEC& result)
{
result.clear();
bool bIsInWord = false;
bool bIsHaveSpace = false;
string strCurWorld;
for (int i=0; i<nSize; i++)
{
char ch = strRow[i];
if (ch == '\0')
{
if (i >= 1 && strRow[i-1] == ',')
{
strCurWorld = ' ';
}
break;
}
bool bIsAdd = true;
if (ch<=0)
{
bIsAdd=false;
}
switch (ch)
{
case ',':
{
if (!bIsInWord)
{
// 一项结束
result.push_back(strCurWorld);
bIsInWord = false;
bIsHaveSpace = false;
strCurWorld = "";
bIsAdd = false;
}
}
break;
case '"':
{
if (!bIsInWord)
{
bIsInWord = true;
bIsHaveSpace = true;
bIsAdd = false;
}
else
{
if ('"' == strRow[i+1])
{
i++;
}
else if (bIsHaveSpace)
{
bIsInWord = false;
bIsAdd = false;
}
else
{
}
}
}
break;
case '?':
bIsAdd=false;
break;
default:
//bIsInWord = true;
break;
};
if (bIsAdd)
{
strCurWorld += ch;
}
}
if (strCurWorld != "")
{
result.push_back(strCurWorld);
}
}
int ufilecsv::findfield(const char* strRow)
{
if (m_nFileState == FILE_STATE_NULL)
{
return -1;
}
ROWVEC::iterator pStart = m_CSVHead.begin();
ROWVEC::iterator pEnd = m_CSVHead.end();
while(pStart != pEnd)
{
wstring arr = Utf82Unicode(*pStart);
CString csTemp;
csTemp=strRow;
CString csTemp1(arr.c_str());
if (!csTemp1.CompareNoCase(csTemp))
{
return int(pStart - m_CSVHead.begin());
}
pStart++;
}
return -1;
}
bool ufilecsv::open(bool bIsRead, const char* strPath, const char* strFilename)
{
m_nFileState = FILE_STATE_NULL;
string strFullFileName(strPath);
strFullFileName += strFilename;
if (!bIsRead)
{
m_CSVFile.open(strFullFileName.c_str(), ios_base::out|ios_base::app);
if (m_CSVFile.is_open())
{
m_CSVFile.close();
}
}
m_CSVFile.open(strFullFileName.c_str(), ios_base::in);
if (!m_CSVFile.is_open())
{
return false;
}
readhead();
if (bIsRead)
{
m_nFileState = FILE_STATE_READ;
}
else
{
m_nFileState = FILE_STATE_WRITE;
m_CSVFile.close();
m_CSVFile.open(strFullFileName.c_str(), ios_base::out|ios_base::app);
if (!m_CSVFile.is_open())
{
return false;
}
m_CSVCurRow = m_CSVHead;
setio();
}
return true;
}
bool ufilecsv::close()
{
m_nFileState = FILE_STATE_NULL;
if (!m_CSVFile.is_open())
{
return true;
}
m_CSVFile.close();
return true;
}
// 读接口
bool ufilecsv::setio()
{
if (m_nFileState == FILE_STATE_READ)
{
char strHeadLine[4096];
if (m_CSVFile.eof())
{
return false;
}
m_CSVFile.getline(strHeadLine, sizeof(strHeadLine));
parserow(strHeadLine, sizeof(strHeadLine), m_CSVCurRow);
}
else if (m_nFileState == FILE_STATE_WRITE)
{
for (ROWVEC::iterator it = m_CSVCurRow.begin();
it != m_CSVCurRow.end(); it++)
{
string str = *it;
*it = "";
replace(str, "\"", "\"\"");
m_CSVFile << '"' << str << '"';
if (it + 1 != m_CSVCurRow.end())
{
m_CSVFile << ',';
}
}
m_CSVFile << endl;
}
else
{
return false;
}
return true;
}
bool ufilecsv::sethead(const char* head)
{
strcpy(m_headline, head);
return true;
}
读取文件示例:
void Clanguage::ReadF()
{
ROWVEC head;
CString str;
CString Htxt[4]={_T("type"),_T("VID"),_T("language"),_T("strvalue")};
ufilecsv file;
char* id="1";
CString filePath = _T("G:\\vs2010\\项目\\六面钻\\HiWCAM_V2(src_180730)\\src\\HiWCAM2\\Language\\");
CString fileName = _T("language.txt");
char* cArrTemp=new char[128];
for (int i=0;i<4;i++)
{
head.push_back(WcharToChar(Htxt[i].GetBuffer()));
}
if (file.open(true, WcharToChar(filePath.GetBuffer()), WcharToChar(fileName.GetBuffer())))
{
int i=0;
while(file.setio())
{
CString one;
langdata ldata;
file.read(head[0].c_str(),cArrTemp);
ldata.type=atoi(cArrTemp);
file.read(head[1].c_str(),cArrTemp);
strcpy(ldata.VID, cArrTemp);
file.read(head[2].c_str(),cArrTemp);
strcpy(ldata.language, cArrTemp);
file.read(head[3].c_str(),cArrTemp);
strcpy(ldata.strvalue, cArrTemp);
m_langdatalist.push_back(ldata);
}
file.close();
}
else
{
AfxMessageBox(_T("文件打开失败!"));
return;
}
}