Skip to content
Snippets Groups Projects
Commit ff695b9b authored by Juan Manuel Carmona Loaiza's avatar Juan Manuel Carmona Loaiza :ghost:
Browse files

Corrections to last pull request

parent b8cffa22
No related branches found
No related tags found
No related merge requests found
......@@ -65,14 +65,15 @@ OutputData<double>* OutputDataReadNumpyTXTStrategy::readOutputData(std::istream&
}
if(nrows < 2){
return ArrayUtils::createData1D(data[0]).release();
return ArrayUtils::createData1D(std::move(data[0])).release();
}
else if(ncols < 2){
std::vector<double> vector1d;
for(size_t i = 0; i < data.size(); i++){
vector1d.push_back(data[i][0]);
const size_t size = data.size();
std::vector<double> vector1d(size);
for(size_t i = 0; i < size; ++i){
vector1d[i] = data[i][0];
}
return ArrayUtils::createData1D(vector1d).release();
return ArrayUtils::createData1D(std::move(vector1d)).release();
}
else{
return ArrayUtils::createData2D(data).release();
......
......@@ -32,9 +32,7 @@ bool csv::isAscii(QString filename)
return true;
// TODO
// This function needs to be defined properly;
// For the moment it always evaluates to 'true',
// as there was a problem with a file containing
// ° and Å characters
// motivation: ° and Å characters are problematic.
char c;
unsigned count = 0;
unsigned count_bad = 0;
......@@ -47,31 +45,7 @@ bool csv::isAscii(QString filename)
is.close();
double acceptance_threshold = 0.1f * double(count);
//std::cout << count << "; " << count_bad << std::endl;
//After trying with some binary files
//we get the following numbers:
//count; count_bad
//1000; 46
//1000; 47
//1000; 42
//143; 17
//1000; 46
//1000; 44
//1000; 45
//1000; 42
//1000; 104
//159; 33
//1000; 152
//1000; 135
//1000; 49
//1000; 129
//305; 64
//The solution is not straightforward.
//What about files with 100 cyrilic characters out of 1000?
if(double(count_bad) > acceptance_threshold){
//return false;
return false;
}
return true;
return static_cast<double>(count_bad) <= acceptance_threshold;
}
CsvImportAssistant::CsvImportAssistant(const QString& file, const bool useGUI, QWidget* parent):
......
......@@ -26,50 +26,10 @@
namespace csv{
typedef std::vector<std::vector<std::string>> DataArray ;
typedef std::vector<std::string> DataRow;
/*
If a file contains only the decimal bytes 9–13, 32–126, it's probably a pure ASCII text file.
Otherwise, it's not. However, it may still be text in another encoding.
If, in addition to the above bytes, the file contains only the decimal bytes 128–255,
it's probably a text file in an 8-bit or variable-length ASCII-based encoding such as ISO-8859-1,
UTF-8 or ASCII+Big5. If not, for some purposes you may be able to stop here and consider the
file to be binary. However, it may still be text in a 16- or 32-bit encoding.
If a file doesn't meet the above constraints, examine the first 2–4 bytes of the file for a
byte-order mark:
If the first two bytes are hex FE FF, the file is tentatively UTF-16 BE.
If the first two bytes are hex FF FE, and the following two bytes are not hex 00 00,
the file is tentatively UTF-16 LE.
If the first four bytes are hex 00 00 FE FF, the file is tentatively UTF-32 BE.
If the first four bytes are hex FF FE 00 00, the file is tentatively UTF-32 LE.
If, through the above checks, you have determined a tentative encoding, then check only for the
corresponding encoding below, to ensure that the file is not a binary file which happens to match
a byte-order mark.
If you have not determined a tentative encoding, the file might still be a text file in one of these
encodings, since the byte-order mark is not mandatory, so check for all encodings in the following
list:
If the file contains only big-endian two-byte words with the decimal values 9–13, 32–126, and 128 or above, the file is probably UTF-16 BE.
If the file contains only little-endian two-byte words with the decimal values 9–13, 32–126, and 128 or above, the file is probably UTF-16 LE.
If the file contains only big-endian four-byte words with the decimal values 9–13, 32–126, and 128 or above, the file is probably UTF-32 BE.
If the file contains only little-endian four-byte words with the decimal values 9–13, 32–126, and 128 or above, the file is probably UTF-32 LE.
If, after all these checks, you still haven't determined an encoding, the file isn't a text file
in any ASCII-based encoding I know about, so for most purposes you can probably consider it to be binary
(it might still be a text file in a non-ASCII encoding such as EBCDIC,but I suspect that's
well outside the scope of your concern).
*/
bool isAscii(QString filename);
}
//! Logic for importing intensity data from csv files
class BA_CORE_API_ CsvImportAssistant: public QObject
{
Q_OBJECT
......
......@@ -31,8 +31,8 @@ namespace
{
const QString filter_string_ba = "Intensity File (*.int *.gz *.tif *.tiff *.txt *.csv);;"
"Other (*.*)";
const QString filter_string_ascii = "Ascii columnwise data (*.txt *.csv *.dat *.ascii);;"
"Other ascii file extensions (*.*)";
const QString filter_string_ascii = "Intensity File (*.int *.int.gz *.txt *.csv *.dat *.ascii);;"
"Ascii column-wise data (*.*)";
int getRank(const RealDataItem& item)
{
......@@ -50,7 +50,7 @@ std::unique_ptr<OutputData<double>> ImportDataUtils::ImportKnownData(QString& fi
try {
std::unique_ptr<OutputData<double>> data(
IntensityDataIOFactory::readOutputData(fileName.toStdString()));
result = CreateSimplifiedOutputData(*data.get());
result = CreateSimplifiedOutputData(*data);
} catch(std::exception& ex)
{
QString message = QString("Error while trying to read file\n\n'%1'\n\n%2")
......@@ -104,8 +104,7 @@ ImportDataInfo ImportDataUtils::Import1dData(QString& baseNameOfLoadedFile)
DataFormatUtils::isTiffFile(fileName.toStdString())
){
try{
ImportDataInfo result(ImportKnownData(fileName),AxesUnits::NBINS);
return result;
return ImportDataInfo(ImportKnownData(fileName), AxesUnits::NBINS);
}
catch(...){
return getFromImportAssistant(fileName);
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment