Okay, here is the setup (derived from http://www.powertheshell.com/autodetecting-csv-delimiter/):
Edit:
Please see post #43 for the most up-to-date version of this code.
project/
- files/
- - colon.txt
- - comma.txt
- - mixture.txt
- - pipe.txt
- - pound.txt
- - semicolon.txt
- csv.php
- test.php
The files:
colon.txt
this:is:"a test":to:123:see:how:it:works
this: is: "a test": to: 123: see: how: it: works
123.:can?:you&:see:what:I'm:doing?:eight*:nine
comma.txt
this,is,"a test",to,123,see,how,it,works
this, is, "a test", to, 123, see, how, it, works
123.,can?,you&,see,what,I'm,doing?,eight*,nine
mixture.txt
this|is|"a test"|to|123|see|how|it|works
this; is; "a test"; to; 123; see; how; it; works
123.|can?|you&|see|what|I'm|doing?|eight*|nine
pipe.txt
this|is|"a test"|to|123|see|how|it|works
this| is| "a test"| to| 123| see| how| it| works
123.|can?|you&|see|what|I'm|doing?|eight*|nine
pound.txt
this#is#"a test"#to#123#see#how#it#works
this# is# "a test"# to# 123# see# how# it# works
123.#can?#you&#see#what#I'm#doing?#eight*#nine
semicolon.txt
this;is;"a test";to;123;see;how;it;works
this; is; "a test"; to; 123; see; how; it; works
123.;can?;you&;see;what;I'm;doing?;eight*;nine
csv.php
<?php
class CSV
{
private $filePath;
private $fileContents;
const ACCEPTABLE_DELIMITERS = '~[#,;:|]~'; // acceptable delimiters
public function __construct($file)
{
$this->filePath = $file;
$this->fileContents = file($file);
}
public function getDelimiter()
{
$delimitersByLine = array();
foreach ($this->fileContents as $lineNumber => $line)
{
$quoted = false;
$delimiters = array();
for ($i = 0; $i < strlen($line) - 1; $i++)
{
$char = substr($line, $i, 1);
if ($char === '"')
{
$quoted = !$quoted;
}
else if (!$quoted && preg_match(self::ACCEPTABLE_DELIMITERS, $char))
{
if (array_key_exists($char, $delimiters))
{
$delimiters[$char]++;
}
else
{
$delimiters[$char] = 1;
}
}
}
if (empty($delimitersByLine))
{
$delimitersByLine = $delimiters;
}
else
{
$newDelimitersByLine = $delimiters;
foreach ($delimitersByLine as $key => $value)
{
if ((array_key_exists($key, $delimiters) && $delimiters[$key] === $value)
|| !array_key_exists($key, $delimiters))
{
$newDelimitersByLine[$key] = $value;
}
}
$delimitersByLine = $newDelimitersByLine;
if (sizeof($delimitersByLine) < 2)
break;
}
}
arsort($delimitersByLine);
$firstDelimiter = key($delimitersByLine);
if (sizeof($delimitersByLine) > 1)
{
next($delimitersByLine);
$nextDelimiter = key($delimitersByLine);
if ($delimitersByLine[$firstDelimiter] === $delimitersByLine[$nextDelimiter])
{
// multiple delimiters with the same frequency found
// throw an error
throw new UnexpectedValueException();
}
return $firstDelimiter;
}
else
return $firstDelimiter;
}
}
test.php
<?php
include('csv.php');
$comma = new CSV('files/comma.txt');
echo 'Delimiter for comma.txt is ' . $comma->getDelimiter() . '<br />';
$colon = new CSV('files/colon.txt');
echo 'Delimiter for colon.txt is ' . $colon->getDelimiter() . '<br />';
$pipe = new CSV('files/pipe.txt');
echo 'Delimiter for pipe.txt is ' . $pipe->getDelimiter() . '<br />';
$pound = new CSV('files/pound.txt');
echo 'Delimiter for pound.txt is ' . $pound->getDelimiter() . '<br />';
$semicolon = new CSV('files/semicolon.txt');
echo 'Delimiter for semicolon.txt is ' . $semicolon->getDelimiter() . '<br />';
$mixture = new CSV('files/mixture.txt');
echo 'Delimiter for mixture.txt is ' . $mixture->getDelimiter() . '<br />';
The Output:
Delimiter for comma.txt is ,
Delimiter for colon.txt is :
Delimiter for pipe.txt is |
Delimiter for pound.txt is #
Delimiter for semicolon.txt is ;
Fatal error: Uncaught exception 'UnexpectedValueException' in M:\\SVN\\sitepoint\ runk\\Sitepoint\\cancer10\\csv.php:75 Stack trace: #0 M:\\SVN\\sitepoint\ runk\\Sitepoint\\cancer10\ est.php(20): CSV->getDelimiter() #1 {main} thrown in M:\\SVN\\sitepoint\ runk\\Sitepoint\\cancer10\\csv.php on line 75
As an attachment: