欢迎您访问程序员文章站本站旨在为大家提供分享程序员计算机编程知识!
您现在的位置是: 首页

文件解析之CSV

程序员文章站 2024-03-21 14:45:04
...

最近使用CSV文件做过一个项目,实现了一个CSV文件解析类如下:

upub.h

#pragma once

using namespace std;

#include <iostream>
#include <fstream>  
#include <sstream>  

#include <deque>
#include <vector>
#include <stack>

#include <string>  

#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <time.h>

#define FLOAT_MAX (3.4e+38)
#define FLOAT_MIN (3.4e-38)
#define DOUBLE_MAX (1.7e+308)
#define DOUBLE_MIN (1.7e-308)

typedef enum ussvtype { 
	USS_VTYPE_NULL = 0,
	USS_VTYPE_BOOL,
	USS_VTYPE_INT32,
	USS_VTYPE_UINT32,
	USS_VTYPE_DOUBLE
}ussvtype;

typedef struct ussvar {
    ussvtype vt;
    
    union {
        bool bol;
        signed int i32;
        unsigned int u32;
        double dou;
    }val;
} ussvar;

ucsv.h:

#include "upub.h"

#pragma once
// using namespace std;

#include <iostream>
#include <fstream>  
#include <sstream>  

#include <deque>
#include <vector>
#include <stack>

#include <string>  

#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <time.h>

#define FLOAT_MAX (3.4e+38)
#define FLOAT_MIN (3.4e-38)
#define DOUBLE_MAX (1.7e+308)
#define DOUBLE_MIN (1.7e-308)


typedef vector<string> ROWVEC;  

enum  
{  
    FILE_STATE_NULL,  
    FILE_STATE_READ,  
    FILE_STATE_WRITE,  
};  

class ufilecsv  
{  
public:  
    ufilecsv();
    ~ufilecsv();
    bool    open(bool bIsRead, const char* strPath, const char* strFilename);  
    bool    close();
    bool    setio();
    bool    sethead(const char* head);

    // 读接口  
    template<class T>  
    bool read(const char* strFieldName, T& data)  
    {  
        if (m_nFileState != FILE_STATE_READ)  
        {  
            return false;  
        }  
  
        int n   = findfield(strFieldName);  
        if (n == -1 || n >= m_CSVCurRow.size())  
        {  
            return false;  
        }  

        /*can't support string which contains space, so add readstring()*/
        stringstream ss;  
        ss << m_CSVCurRow[n];  
        ss >> data;  
  
        return true;  
    }  
  
    // 写接口  
    template<class T>  
    bool write(const char* strFieldName, T data)  
    {  
        if (m_nFileState != FILE_STATE_WRITE)  
        {  
            return false;  
        }  
  
        int n   = findfield(strFieldName);  
        if (n == -1)  
        {  
            return false;  
        }  
        
        /*can't support string which contains space, so add writestring()*/
        stringstream ss;  
        ss << data;  
        m_CSVCurRow[n]  = ss.str();  
  
        return true;  
    }  

    bool readstring(const char* strFieldName, char* data, int size)  
    {  
        if (m_nFileState != FILE_STATE_READ)  
        {  
            return false;  
        }  
  
        int n   = findfield(strFieldName);  
        if (n == -1 || n >= m_CSVCurRow.size())  
        {  
            return false;  
        }  
  
        strncpy(data, m_CSVCurRow[n].data(), size-1);
  
        return true;  
    }  

    bool writestring(const char* strFieldName, char* data, int size)  
    {  
        if (m_nFileState != FILE_STATE_WRITE)  
        {  
            return false;  
        }  
  
        int n   = findfield(strFieldName);  
        if (n == -1)  
        {  
            return false;  
        }  

        m_CSVCurRow[n]  = data;  
  
        return true;  
    }  

private:  
    void    readhead();  
    void    parserow(const char* strRow, int nSize, ROWVEC& result);  
    int     findfield(const char* strRow);  
private:  
    int         m_nFileState;  
    fstream     m_CSVFile;  
    ROWVEC      m_CSVHead;  
    ROWVEC      m_CSVCurRow;  

    char        m_headline[4096];
};  
  

ucsv.cpp:

#include "stdafx.h"
#include "ucsv.h"

//string转wstring
std::wstring Utf82Unicode(const std::string& utf8string)  
{  
	int widesize = ::MultiByteToWideChar(CP_UTF8, 0, utf8string.c_str(), -1, NULL, 0);  
	if (widesize == ERROR_NO_UNICODE_TRANSLATION)  
	{  
		throw std::exception("Invalid UTF-8 sequence.");  
	}  
	if (widesize == 0)  
	{  
		throw std::exception("Error in conversion.");  
	}  
	std::vector<wchar_t> resultstring(widesize);  
	int convresult = ::MultiByteToWideChar(CP_UTF8, 0, utf8string.c_str(), -1, &resultstring[0], widesize);  
	if (convresult != widesize)  
	{  
		throw std::exception("La falla!");  
	}  
	return std::wstring(&resultstring[0]);  
}  

int replace(string& src_str, const string& old_str, const string& new_str)  
{  
    int count   = 0;  
    int old_str_len = int(old_str.length());  
    int new_str_len = int(new_str.length());  
    int pos = 0;  
    while((pos=int(src_str.find(old_str,pos)))!=string::npos)  
    {  
        src_str.replace(pos,old_str_len,new_str);  
        pos+=new_str_len;  
        ++count;  
    }  
    return count;  
}   

ufilecsv::ufilecsv()
{
    memset(m_headline, 0, sizeof(m_headline));
}

ufilecsv::~ufilecsv()
{
    
}

void ufilecsv::readhead()  
{  
    char strHeadLine[4096];
    
    m_CSVFile.getline(strHeadLine, sizeof(strHeadLine));
    
    if ((m_headline[0] != 0) && (strcmp(m_headline, strHeadLine) != 0))
    {
        parserow(m_headline, sizeof(m_headline), m_CSVHead);  
    }
    else
    {
        parserow(strHeadLine, sizeof(strHeadLine), m_CSVHead);  
    }
}  
  
void ufilecsv::parserow(const char* strRow, int nSize, ROWVEC& result)  
{  
    result.clear();  
  
    bool    bIsInWord   = false;  
    bool    bIsHaveSpace = false;  
    string  strCurWorld;  
  
    for (int i=0; i<nSize; i++)  
    {  
        char ch = strRow[i];  
        if (ch == '\0')  
        {  
            if (i >= 1 && strRow[i-1] == ',')  
            {  
                strCurWorld = ' ';  
            }  
  
            break;  
        }  
  
        bool bIsAdd = true;  
		if (ch<=0)
		{
			bIsAdd=false;
		}
  
        switch (ch)  
        {  
        case ',':  
            {  
                if (!bIsInWord)  
                {  
                    // 一项结束  
  
                    result.push_back(strCurWorld);  
  
                    bIsInWord   = false;  
                    bIsHaveSpace    = false;  
                    strCurWorld = "";  
  
                    bIsAdd  = false;  
                }  
            }  
            break;  
        case '"':  
            {  
                if (!bIsInWord)  
                {  
                    bIsInWord   = true;  
                    bIsHaveSpace    = true;  
  
                    bIsAdd  = false;  
                }  
                else  
                {  
                    if ('"' == strRow[i+1])  
                    {  
                        i++;  
                    }  
                    else if (bIsHaveSpace)  
                    {  
                        bIsInWord   = false;  
  
                        bIsAdd  = false;  
                    }  
                    else  
                    {  
                    }  
  
                }  
            }  
            break;  
		case '?':
			bIsAdd=false;
			break;
        default:  
            //bIsInWord = true;  
            break;  
        };  
  
        if (bIsAdd)  
        {  
            strCurWorld += ch;  
        }  
  
    }  
  
    if (strCurWorld != "")  
    {  
        result.push_back(strCurWorld);  
    }  
  
}  
  
int ufilecsv::findfield(const char* strRow)  
{  
    if (m_nFileState == FILE_STATE_NULL)  
    {  
        return -1;  
    }  
	ROWVEC::iterator pStart = m_CSVHead.begin();
	ROWVEC::iterator pEnd = m_CSVHead.end();
	while(pStart != pEnd)
	{
		wstring arr = Utf82Unicode(*pStart);
		CString csTemp;
		csTemp=strRow;
		CString csTemp1(arr.c_str());

        if (!csTemp1.CompareNoCase(csTemp))  
        {  
            return int(pStart - m_CSVHead.begin());  
        }  
		pStart++;
    }  
  
    return -1;  
}  
  
  
bool ufilecsv::open(bool bIsRead, const char* strPath, const char* strFilename)  
{  
    m_nFileState    = FILE_STATE_NULL;  
    string strFullFileName(strPath);  
    strFullFileName += strFilename;  

    if (!bIsRead)
    {
        m_CSVFile.open(strFullFileName.c_str(), ios_base::out|ios_base::app); 
        if (m_CSVFile.is_open())
        {
            m_CSVFile.close();
       }
    }
    

    m_CSVFile.open(strFullFileName.c_str(), ios_base::in); 
    
    if (!m_CSVFile.is_open())  
    {  
        return false;  
    }  
  
    readhead();  
  
    if (bIsRead)  
    {  
        m_nFileState    = FILE_STATE_READ;  
    }  
	else  
    {  
        m_nFileState    = FILE_STATE_WRITE;  
        m_CSVFile.close();  
        m_CSVFile.open(strFullFileName.c_str(), ios_base::out|ios_base::app);  
        if (!m_CSVFile.is_open())  
        {  
            return false;  
        }  
  
        m_CSVCurRow = m_CSVHead;  
        setio();  
    }  
  
    return true;  
}  

bool ufilecsv::close()  
{  
    m_nFileState    = FILE_STATE_NULL;  
  
    if (!m_CSVFile.is_open())  
    {  
        return true;  
    }  
    
    m_CSVFile.close();
  
    return true;  
}
  
// 读接口  
bool ufilecsv::setio()  
{  
    if (m_nFileState == FILE_STATE_READ)  
    {  
        char strHeadLine[4096];
        
        if (m_CSVFile.eof())  
        {  
            return false;  
        }  
        
        m_CSVFile.getline(strHeadLine, sizeof(strHeadLine));  

        parserow(strHeadLine, sizeof(strHeadLine), m_CSVCurRow);  
    }
    else if (m_nFileState == FILE_STATE_WRITE)
    {
        for (ROWVEC::iterator it    = m_CSVCurRow.begin();  
        it != m_CSVCurRow.end(); it++)  
        {  
            string str = *it;  
            *it = "";  
            replace(str, "\"", "\"\"");  
      
            m_CSVFile << '"' << str << '"';  
            if (it + 1 != m_CSVCurRow.end())  
            {  
                m_CSVFile << ',';  
            }  
        }  
      
        m_CSVFile << endl;  
    }
    else
    {
        return false;
    }
    
    return true;  
}

bool ufilecsv::sethead(const char* head)  
{  
    strcpy(m_headline, head);
    return true;  
}

读取文件示例:

void Clanguage::ReadF()
{
	ROWVEC head;
	CString str;
	CString Htxt[4]={_T("type"),_T("VID"),_T("language"),_T("strvalue")};
	ufilecsv file;
	char* id="1";
	CString filePath = _T("G:\\vs2010\\项目\\六面钻\\HiWCAM_V2(src_180730)\\src\\HiWCAM2\\Language\\");
	CString fileName = _T("language.txt");
	char* cArrTemp=new char[128];
	for (int i=0;i<4;i++)
	{
		head.push_back(WcharToChar(Htxt[i].GetBuffer()));
	}
	
	if (file.open(true, WcharToChar(filePath.GetBuffer()), WcharToChar(fileName.GetBuffer())))
	{
		int i=0;
		while(file.setio())
		{
			CString one;
			langdata ldata;
			file.read(head[0].c_str(),cArrTemp);
			ldata.type=atoi(cArrTemp);
			file.read(head[1].c_str(),cArrTemp);
			strcpy(ldata.VID, cArrTemp);
			file.read(head[2].c_str(),cArrTemp);
			strcpy(ldata.language, cArrTemp);
			file.read(head[3].c_str(),cArrTemp);
			strcpy(ldata.strvalue, cArrTemp);
			m_langdatalist.push_back(ldata);
		}
		file.close();
	}
	else
	{
		AfxMessageBox(_T("文件打开失败!"));
		return;
	}
}

 

相关标签: CSV