Support multi-line headers in COPY FROM command. · postgres/postgres@bc2f348 · GitHub | Latest TMZ Celebrity News & Gossip | Watch TMZ Live
Skip to content

Commit bc2f348

Browse files
committed
Support multi-line headers in COPY FROM command.
The COPY FROM command now accepts a non-negative integer for the HEADER option, allowing multiple header lines to be skipped. This is useful when the input contains multi-line headers that should be ignored during data import. Author: Shinya Kato <shinya11.kato@gmail.com> Co-authored-by: Fujii Masao <masao.fujii@gmail.com> Reviewed-by: Yugo Nagata <nagata@sraoss.co.jp> Discussion: https://postgr.es/m/CAOzEurRPxfzbxqeOPF_AGnAUOYf=Wk0we+1LQomPNUNtyZGBZw@mail.gmail.com
1 parent fd7d7b7 commit bc2f348

File tree

10 files changed

+138
-42
lines changed

10 files changed

+138
-42
lines changed

doc/src/sgml/ref/copy.sgml

Lines changed: 28 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ COPY { <replaceable class="parameter">table_name</replaceable> [ ( <replaceable
3737
DELIMITER '<replaceable class="parameter">delimiter_character</replaceable>'
3838
NULL '<replaceable class="parameter">null_string</replaceable>'
3939
DEFAULT '<replaceable class="parameter">default_string</replaceable>'
40-
HEADER [ <replaceable class="parameter">boolean</replaceable> | MATCH ]
40+
HEADER [ <replaceable class="parameter">boolean</replaceable> | <replaceable class="parameter">integer</replaceable> | MATCH ]
4141
QUOTE '<replaceable class="parameter">quote_character</replaceable>'
4242
ESCAPE '<replaceable class="parameter">escape_character</replaceable>'
4343
FORCE_QUOTE { ( <replaceable class="parameter">column_name</replaceable> [, ...] ) | * }
@@ -212,6 +212,15 @@ COPY { <replaceable class="parameter">table_name</replaceable> [ ( <replaceable
212212
</listitem>
213213
</varlistentry>
214214

215+
<varlistentry>
216+
<term><replaceable class="parameter">integer</replaceable></term>
217+
<listitem>
218+
<para>
219+
Specifies a non-negative integer value passed to the selected option.
220+
</para>
221+
</listitem>
222+
</varlistentry>
223+
215224
<varlistentry>
216225
<term><literal>FORMAT</literal></term>
217226
<listitem>
@@ -303,16 +312,25 @@ COPY { <replaceable class="parameter">table_name</replaceable> [ ( <replaceable
303312
<term><literal>HEADER</literal></term>
304313
<listitem>
305314
<para>
306-
Specifies that the file contains a header line with the names of each
307-
column in the file. On output, the first line contains the column
308-
names from the table. On input, the first line is discarded when this
309-
option is set to <literal>true</literal> (or equivalent Boolean value).
310-
If this option is set to <literal>MATCH</literal>, the number and names
311-
of the columns in the header line must match the actual column names of
312-
the table, in order; otherwise an error is raised.
315+
On output, if this option is set to <literal>true</literal>
316+
(or an equivalent Boolean value), the first line of the output will
317+
contain the column names from the table.
318+
Integer values <literal>0</literal> and <literal>1</literal> are
319+
accepted as Boolean values, but other integers are not allowed for
320+
<command>COPY TO</command> commands.
321+
</para>
322+
<para>
323+
On input, if this option is set to <literal>true</literal>
324+
(or an equivalent Boolean value), the first line of the input is
325+
discarded. If set to a non-negative integer, that number of
326+
lines are discarded. If set to <literal>MATCH</literal>, the first line
327+
is discarded, and it must contain column names that exactly match the
328+
table's columns, in both number and order; otherwise, an error is raised.
329+
The <literal>MATCH</literal> value is only valid for
330+
<command>COPY FROM</command> commands.
331+
</para>
332+
<para>
313333
This option is not allowed when using <literal>binary</literal> format.
314-
The <literal>MATCH</literal> option is only valid for <command>COPY
315-
FROM</command> commands.
316334
</para>
317335
</listitem>
318336
</varlistentry>

src/backend/commands/copy.c

Lines changed: 26 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -322,11 +322,13 @@ DoCopy(ParseState *pstate, const CopyStmt *stmt,
322322
}
323323

324324
/*
325-
* Extract a CopyHeaderChoice value from a DefElem. This is like
326-
* defGetBoolean() but also accepts the special value "match".
325+
* Extract the CopyFormatOptions.header_line value from a DefElem.
326+
*
327+
* Parses the HEADER option for COPY, which can be a boolean, a non-negative
328+
* integer (number of lines to skip), or the special value "match".
327329
*/
328-
static CopyHeaderChoice
329-
defGetCopyHeaderChoice(DefElem *def, bool is_from)
330+
static int
331+
defGetCopyHeaderOption(DefElem *def, bool is_from)
330332
{
331333
/*
332334
* If no parameter value given, assume "true" is meant.
@@ -335,20 +337,27 @@ defGetCopyHeaderChoice(DefElem *def, bool is_from)
335337
return COPY_HEADER_TRUE;
336338

337339
/*
338-
* Allow 0, 1, "true", "false", "on", "off", or "match".
340+
* Allow 0, 1, "true", "false", "on", "off", a non-negative integer, or
341+
* "match".
339342
*/
340343
switch (nodeTag(def->arg))
341344
{
342345
case T_Integer:
343-
switch (intVal(def->arg))
344346
{
345-
case 0:
346-
return COPY_HEADER_FALSE;
347-
case 1:
348-
return COPY_HEADER_TRUE;
349-
default:
350-
/* otherwise, error out below */
351-
break;
347+
int ival = intVal(def->arg);
348+
349+
if (ival < 0)
350+
ereport(ERROR,
351+
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
352+
errmsg("a negative integer value cannot be "
353+
"specified for %s", def->defname)));
354+
355+
if (!is_from && ival > 1)
356+
ereport(ERROR,
357+
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
358+
errmsg("cannot use multi-line header in COPY TO")));
359+
360+
return ival;
352361
}
353362
break;
354363
default:
@@ -381,7 +390,8 @@ defGetCopyHeaderChoice(DefElem *def, bool is_from)
381390
}
382391
ereport(ERROR,
383392
(errcode(ERRCODE_SYNTAX_ERROR),
384-
errmsg("%s requires a Boolean value or \"match\"",
393+
errmsg("%s requires a Boolean value, a non-negative integer, "
394+
"or the string \"match\"",
385395
def->defname)));
386396
return COPY_HEADER_FALSE; /* keep compiler quiet */
387397
}
@@ -566,7 +576,7 @@ ProcessCopyOptions(ParseState *pstate,
566576
if (header_specified)
567577
errorConflictingDefElem(defel, pstate);
568578
header_specified = true;
569-
opts_out->header_line = defGetCopyHeaderChoice(defel, is_from);
579+
opts_out->header_line = defGetCopyHeaderOption(defel, is_from);
570580
}
571581
else if (strcmp(defel->defname, "quote") == 0)
572582
{
@@ -769,7 +779,7 @@ ProcessCopyOptions(ParseState *pstate,
769779
errmsg("COPY delimiter cannot be \"%s\"", opts_out->delim)));
770780

771781
/* Check header */
772-
if (opts_out->binary && opts_out->header_line)
782+
if (opts_out->binary && opts_out->header_line != COPY_HEADER_FALSE)
773783
ereport(ERROR,
774784
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
775785
/*- translator: %s is the name of a COPY option, e.g. ON_ERROR */

src/backend/commands/copyfromparse.c

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -771,21 +771,30 @@ static pg_attribute_always_inline bool
771771
NextCopyFromRawFieldsInternal(CopyFromState cstate, char ***fields, int *nfields, bool is_csv)
772772
{
773773
int fldct;
774-
bool done;
774+
bool done = false;
775775

776776
/* only available for text or csv input */
777777
Assert(!cstate->opts.binary);
778778

779779
/* on input check that the header line is correct if needed */
780-
if (cstate->cur_lineno == 0 && cstate->opts.header_line)
780+
if (cstate->cur_lineno == 0 && cstate->opts.header_line != COPY_HEADER_FALSE)
781781
{
782782
ListCell *cur;
783783
TupleDesc tupDesc;
784+
int lines_to_skip = cstate->opts.header_line;
785+
786+
/* If set to "match", one header line is skipped */
787+
if (cstate->opts.header_line == COPY_HEADER_MATCH)
788+
lines_to_skip = 1;
784789

785790
tupDesc = RelationGetDescr(cstate->rel);
786791

787-
cstate->cur_lineno++;
788-
done = CopyReadLine(cstate, is_csv);
792+
for (int i = 0; i < lines_to_skip; i++)
793+
{
794+
cstate->cur_lineno++;
795+
if ((done = CopyReadLine(cstate, is_csv)))
796+
break;
797+
}
789798

790799
if (cstate->opts.header_line == COPY_HEADER_MATCH)
791800
{

src/backend/commands/copyto.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -199,7 +199,7 @@ CopyToTextLikeStart(CopyToState cstate, TupleDesc tupDesc)
199199
cstate->file_encoding);
200200

201201
/* if a header has been requested send the line */
202-
if (cstate->opts.header_line)
202+
if (cstate->opts.header_line == COPY_HEADER_TRUE)
203203
{
204204
ListCell *cur;
205205
bool hdr_delim = false;

src/include/commands/copy.h

Lines changed: 7 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -20,15 +20,12 @@
2020
#include "tcop/dest.h"
2121

2222
/*
23-
* Represents whether a header line should be present, and whether it must
24-
* match the actual names (which implies "true").
23+
* Represents whether a header line must match the actual names
24+
* (which implies "true"), and whether it should be present.
2525
*/
26-
typedef enum CopyHeaderChoice
27-
{
28-
COPY_HEADER_FALSE = 0,
29-
COPY_HEADER_TRUE,
30-
COPY_HEADER_MATCH,
31-
} CopyHeaderChoice;
26+
#define COPY_HEADER_MATCH -1
27+
#define COPY_HEADER_FALSE 0
28+
#define COPY_HEADER_TRUE 1
3229

3330
/*
3431
* Represents where to save input processing errors. More values to be added
@@ -64,7 +61,8 @@ typedef struct CopyFormatOptions
6461
bool binary; /* binary format? */
6562
bool freeze; /* freeze rows on loading? */
6663
bool csv_mode; /* Comma Separated Value format? */
67-
CopyHeaderChoice header_line; /* header line? */
64+
int header_line; /* number of lines to skip or COPY_HEADER_XXX
65+
* value (see the above) */
6866
char *null_print; /* NULL marker string (server encoding!) */
6967
int null_print_len; /* length of same */
7068
char *null_print_client; /* same converted to file encoding */

src/test/regress/expected/copy.out

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,29 @@ copy copytest4 to stdout (header);
8181
c1 colname with tab: \t
8282
1 a
8383
2 b
84+
-- test multi-line header line feature
85+
create temp table copytest5 (c1 int);
86+
copy copytest5 from stdin (format csv, header 2);
87+
copy copytest5 to stdout (header);
88+
c1
89+
1
90+
2
91+
truncate copytest5;
92+
copy copytest5 from stdin (format csv, header 4);
93+
select count(*) from copytest5;
94+
count
95+
-------
96+
0
97+
(1 row)
98+
99+
truncate copytest5;
100+
copy copytest5 from stdin (format csv, header 5);
101+
select count(*) from copytest5;
102+
count
103+
-------
104+
0
105+
(1 row)
106+
84107
-- test copy from with a partitioned table
85108
create table parted_copytest (
86109
a int,
@@ -224,7 +247,7 @@ alter table header_copytest add column c text;
224247
copy header_copytest to stdout with (header match);
225248
ERROR: cannot use "match" with HEADER in COPY TO
226249
copy header_copytest from stdin with (header wrong_choice);
227-
ERROR: header requires a Boolean value or "match"
250+
ERROR: header requires a Boolean value, a non-negative integer, or the string "match"
228251
-- works
229252
copy header_copytest from stdin with (header match);
230253
copy header_copytest (c, a, b) from stdin with (header match);

src/test/regress/expected/copy2.out

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,12 @@ COPY x from stdin with (reject_limit 1);
132132
ERROR: COPY REJECT_LIMIT requires ON_ERROR to be set to IGNORE
133133
COPY x from stdin with (on_error ignore, reject_limit 0);
134134
ERROR: REJECT_LIMIT (0) must be greater than zero
135+
COPY x from stdin with (header -1);
136+
ERROR: a negative integer value cannot be specified for header
137+
COPY x from stdin with (header 2.5);
138+
ERROR: header requires a Boolean value, a non-negative integer, or the string "match"
139+
COPY x to stdout with (header 2);
140+
ERROR: cannot use multi-line header in COPY TO
135141
-- too many columns in column list: should fail
136142
COPY x (a, b, c, d, e, d, c) from stdin;
137143
ERROR: column "d" specified more than once

src/test/regress/sql/copy.sql

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,36 @@ this is just a line full of junk that would error out if parsed
9494

9595
copy copytest4 to stdout (header);
9696

97+
-- test multi-line header line feature
98+
99+
create temp table copytest5 (c1 int);
100+
101+
copy copytest5 from stdin (format csv, header 2);
102+
this is a first header line.
103+
this is a second header line.
104+
1
105+
2
106+
\.
107+
copy copytest5 to stdout (header);
108+
109+
truncate copytest5;
110+
copy copytest5 from stdin (format csv, header 4);
111+
this is a first header line.
112+
this is a second header line.
113+
1
114+
2
115+
\.
116+
select count(*) from copytest5;
117+
118+
truncate copytest5;
119+
copy copytest5 from stdin (format csv, header 5);
120+
this is a first header line.
121+
this is a second header line.
122+
1
123+
2
124+
\.
125+
select count(*) from copytest5;
126+
97127
-- test copy from with a partitioned table
98128
create table parted_copytest (
99129
a int,

src/test/regress/sql/copy2.sql

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,9 @@ COPY x to stdout (format BINARY, on_error unsupported);
9090
COPY x from stdin (log_verbosity unsupported);
9191
COPY x from stdin with (reject_limit 1);
9292
COPY x from stdin with (on_error ignore, reject_limit 0);
93+
COPY x from stdin with (header -1);
94+
COPY x from stdin with (header 2.5);
95+
COPY x to stdout with (header 2);
9396

9497
-- too many columns in column list: should fail
9598
COPY x (a, b, c, d, e, d, c) from stdin;

src/tools/pgindent/typedefs.list

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -521,7 +521,6 @@ CopyFormatOptions
521521
CopyFromRoutine
522522
CopyFromState
523523
CopyFromStateData
524-
CopyHeaderChoice
525524
CopyInsertMethod
526525
CopyLogVerbosityChoice
527526
CopyMethod

0 commit comments

Comments
 (0)

TMZ Celebrity News – Breaking Stories, Videos & Gossip

Looking for the latest TMZ celebrity news? You've come to the right place. From shocking Hollywood scandals to exclusive videos, TMZ delivers it all in real time.

Whether it’s a red carpet slip-up, a viral paparazzi moment, or a legal drama involving your favorite stars, TMZ news is always first to break the story. Stay in the loop with daily updates, insider tips, and jaw-dropping photos.

🎥 Watch TMZ Live

TMZ Live brings you daily celebrity news and interviews straight from the TMZ newsroom. Don’t miss a beat—watch now and see what’s trending in Hollywood.