TagId3.cxx 13.8 KB
Newer Older
1
/*
Max Kellermann's avatar
Max Kellermann committed
2
 * Copyright (C) 2003-2014 The Music Player Daemon Project
3
 * http://www.musicpd.org
4 5 6 7 8 9 10 11 12 13
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
14 15 16 17
 *
 * You should have received a copy of the GNU General Public License along
 * with this program; if not, write to the Free Software Foundation, Inc.,
 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
18 19
 */

20
#include "config.h"
Max Kellermann's avatar
Max Kellermann committed
21
#include "TagId3.hxx"
22
#include "TagHandler.hxx"
23
#include "TagTable.hxx"
24
#include "TagBuilder.hxx"
25
#include "util/Alloc.hxx"
26
#include "util/StringUtil.hxx"
27
#include "util/Error.hxx"
28 29
#include "util/Domain.hxx"
#include "Log.hxx"
30
#include "config/ConfigGlobal.hxx"
31 32
#include "Riff.hxx"
#include "Aiff.hxx"
33 34
#include "fs/Path.hxx"
#include "fs/FileSystem.hxx"
Max Kellermann's avatar
Max Kellermann committed
35

36
#ifdef HAVE_GLIB
37
#include <glib.h>
38 39
#endif

40 41
#include <id3tag.h>

42 43
#include <string>

44
#include <stdio.h>
45
#include <stdlib.h>
46 47
#include <string.h>

48 49 50 51 52 53 54
#  ifndef ID3_FRAME_COMPOSER
#    define ID3_FRAME_COMPOSER "TCOM"
#  endif
#  ifndef ID3_FRAME_DISC
#    define ID3_FRAME_DISC "TPOS"
#  endif

Bart Nagel's avatar
Bart Nagel committed
55 56 57 58
#ifndef ID3_FRAME_ARTIST_SORT
#define ID3_FRAME_ARTIST_SORT "TSOP"
#endif

59
#ifndef ID3_FRAME_ALBUM_ARTIST_SORT
Bart Nagel's avatar
Bart Nagel committed
60
#define ID3_FRAME_ALBUM_ARTIST_SORT "TSO2" /* this one is unofficial, introduced by Itunes */
61 62 63 64 65 66
#endif

#ifndef ID3_FRAME_ALBUM_ARTIST
#define ID3_FRAME_ALBUM_ARTIST "TPE2"
#endif

67 68
static constexpr Domain id3_domain("id3");

69 70 71 72 73 74
static inline bool
tag_is_id3v1(struct id3_tag *tag)
{
	return (id3_tag_options(tag, 0, 0) & ID3_TAG_OPTION_ID3V1) != 0;
}

75 76 77
static id3_utf8_t *
tag_id3_getstring(const struct id3_frame *frame, unsigned i)
{
78
	id3_field *field = id3_frame_field(frame, i);
Max Kellermann's avatar
Max Kellermann committed
79 80
	if (field == nullptr)
		return nullptr;
81

82
	const id3_ucs4_t *ucs4 = id3_field_getstring(field);
Max Kellermann's avatar
Max Kellermann committed
83 84
	if (ucs4 == nullptr)
		return nullptr;
85 86 87 88

	return id3_ucs4_utf8duplicate(ucs4);
}

89 90
/* This will try to convert a string to utf-8,
 */
Max Kellermann's avatar
Max Kellermann committed
91
static id3_utf8_t *
92
import_id3_string(bool is_id3v1, const id3_ucs4_t *ucs4)
93
{
94
	id3_utf8_t *utf8;
95

96
#ifdef HAVE_GLIB
97
	/* use encoding field here? */
98
	const char *encoding;
99
	if (is_id3v1 &&
Max Kellermann's avatar
Max Kellermann committed
100
	    (encoding = config_get_string(CONF_ID3V1_ENCODING, nullptr)) != nullptr) {
101
		id3_latin1_t *isostr = id3_ucs4_latin1duplicate(ucs4);
102
		if (gcc_unlikely(isostr == nullptr))
Max Kellermann's avatar
Max Kellermann committed
103
			return nullptr;
104 105 106

		utf8 = (id3_utf8_t *)
			g_convert_with_fallback((const char*)isostr, -1,
107
						"utf-8", encoding,
Max Kellermann's avatar
Max Kellermann committed
108 109 110
						nullptr, nullptr,
						nullptr, nullptr);
		if (utf8 == nullptr) {
111 112 113
			FormatWarning(id3_domain,
				      "Unable to convert %s string to UTF-8: '%s'",
				      encoding, isostr);
114
			free(isostr);
Max Kellermann's avatar
Max Kellermann committed
115
			return nullptr;
116
		}
117
		free(isostr);
118
	} else {
119 120 121
#else
		(void)is_id3v1;
#endif
122
		utf8 = id3_ucs4_utf8duplicate(ucs4);
123
		if (gcc_unlikely(utf8 == nullptr))
Max Kellermann's avatar
Max Kellermann committed
124
			return nullptr;
125
#ifdef HAVE_GLIB
126
	}
127
#endif
128

129
	id3_utf8_t *utf8_stripped = (id3_utf8_t *)
130
		xstrdup(Strip((char *)utf8));
131
	free(utf8);
132 133

	return utf8_stripped;
134 135
}

136 137 138 139 140 141 142
/**
 * Import a "Text information frame" (ID3v2.4.0 section 4.2).  It
 * contains 2 fields:
 *
 * - encoding
 * - string list
 */
143
static void
144
tag_id3_import_text_frame(struct id3_tag *tag, const struct id3_frame *frame,
145
			  TagType type,
146
			  const struct tag_handler *handler, void *handler_ctx)
147
{
148
	if (frame->nfields != 2)
149
		return;
150 151 152

	/* check the encoding field */

153
	const id3_field *field = id3_frame_field(frame, 0);
Max Kellermann's avatar
Max Kellermann committed
154
	if (field == nullptr || field->type != ID3_FIELD_TYPE_TEXTENCODING)
155
		return;
156

157 158 159
	/* process the value(s) */

	field = id3_frame_field(frame, 1);
Max Kellermann's avatar
Max Kellermann committed
160
	if (field == nullptr || field->type != ID3_FIELD_TYPE_STRINGLIST)
161 162 163
		return;

	/* Get the number of strings available */
164 165 166
	const unsigned nstrings = id3_field_getnstrings(field);
	for (unsigned i = 0; i < nstrings; i++) {
		const id3_ucs4_t *ucs4 = id3_field_getstrings(field, i);
Max Kellermann's avatar
Max Kellermann committed
167
		if (ucs4 == nullptr)
168 169
			continue;

170
		if (type == TAG_GENRE)
171 172
			ucs4 = id3_genre_name(ucs4);

173
		id3_utf8_t *utf8 = import_id3_string(tag_is_id3v1(tag), ucs4);
Max Kellermann's avatar
Max Kellermann committed
174
		if (utf8 == nullptr)
175 176
			continue;

177 178
		tag_handler_invoke_tag(handler, handler_ctx,
				       type, (const char *)utf8);
179
		free(utf8);
180
	}
181 182
}

183 184 185 186 187
/**
 * Import all text frames with the specified id (ID3v2.4.0 section
 * 4.2).  This is a wrapper for tag_id3_import_text_frame().
 */
static void
188
tag_id3_import_text(struct id3_tag *tag, const char *id, TagType type,
189
		    const struct tag_handler *handler, void *handler_ctx)
190 191 192
{
	const struct id3_frame *frame;
	for (unsigned i = 0;
Max Kellermann's avatar
Max Kellermann committed
193
	     (frame = id3_tag_findframe(tag, id, i)) != nullptr; ++i)
194 195
		tag_id3_import_text_frame(tag, frame, type,
					  handler, handler_ctx);
196 197
}

198 199 200 201 202 203 204 205 206 207
/**
 * Import a "Comment frame" (ID3v2.4.0 section 4.10).  It
 * contains 4 fields:
 *
 * - encoding
 * - language
 * - string
 * - full string (we use this one)
 */
static void
208
tag_id3_import_comment_frame(struct id3_tag *tag,
209
			     const struct id3_frame *frame, TagType type,
210 211
			     const struct tag_handler *handler,
			     void *handler_ctx)
212
{
213
	if (frame->nfields != 4)
214 215 216
		return;

	/* for now I only read the 4th field, with the fullstring */
217
	const id3_field *field = id3_frame_field(frame, 3);
Max Kellermann's avatar
Max Kellermann committed
218
	if (field == nullptr)
219 220
		return;

221
	const id3_ucs4_t *ucs4 = id3_field_getfullstring(field);
Max Kellermann's avatar
Max Kellermann committed
222
	if (ucs4 == nullptr)
223 224
		return;

225
	id3_utf8_t *utf8 = import_id3_string(tag_is_id3v1(tag), ucs4);
Max Kellermann's avatar
Max Kellermann committed
226
	if (utf8 == nullptr)
227 228
		return;

229
	tag_handler_invoke_tag(handler, handler_ctx, type, (const char *)utf8);
230
	free(utf8);
231 232
}

233 234 235 236 237
/**
 * Import all comment frames (ID3v2.4.0 section 4.10).  This is a
 * wrapper for tag_id3_import_comment_frame().
 */
static void
238
tag_id3_import_comment(struct id3_tag *tag, const char *id, TagType type,
239
		       const struct tag_handler *handler, void *handler_ctx)
240 241 242
{
	const struct id3_frame *frame;
	for (unsigned i = 0;
Max Kellermann's avatar
Max Kellermann committed
243
	     (frame = id3_tag_findframe(tag, id, i)) != nullptr; ++i)
244 245
		tag_id3_import_comment_frame(tag, frame, type,
					     handler, handler_ctx);
246 247
}

248
/**
249
 * Parse a TXXX name, and convert it to a TagType enum value.
250 251
 * Returns TAG_NUM_OF_ITEM_TYPES if the TXXX name is not understood.
 */
252
static TagType
253 254
tag_id3_parse_txxx_name(const char *name)
{
255 256 257 258 259 260 261
	static const struct tag_table txxx_tags[] = {
		{ "ALBUMARTISTSORT", TAG_ALBUM_ARTIST_SORT },
		{ "MusicBrainz Artist Id", TAG_MUSICBRAINZ_ARTISTID },
		{ "MusicBrainz Album Id", TAG_MUSICBRAINZ_ALBUMID },
		{ "MusicBrainz Album Artist Id",
		  TAG_MUSICBRAINZ_ALBUMARTISTID },
		{ "MusicBrainz Track Id", TAG_MUSICBRAINZ_TRACKID },
Max Kellermann's avatar
Max Kellermann committed
262
		{ nullptr, TAG_NUM_OF_ITEM_TYPES }
263 264
	};

265
	return tag_table_lookup(txxx_tags, name);
266 267 268 269 270 271
}

/**
 * Import all known MusicBrainz tags from TXXX frames.
 */
static void
272 273 274
tag_id3_import_musicbrainz(struct id3_tag *id3_tag,
			   const struct tag_handler *handler,
			   void *handler_ctx)
275 276
{
	for (unsigned i = 0;; ++i) {
277
		const id3_frame *frame = id3_tag_findframe(id3_tag, "TXXX", i);
Max Kellermann's avatar
Max Kellermann committed
278
		if (frame == nullptr)
279 280
			break;

281
		id3_utf8_t *name = tag_id3_getstring(frame, 1);
Max Kellermann's avatar
Max Kellermann committed
282
		if (name == nullptr)
283 284
			continue;

285
		id3_utf8_t *value = tag_id3_getstring(frame, 2);
Max Kellermann's avatar
Max Kellermann committed
286
		if (value == nullptr)
287 288
			continue;

289 290 291 292
		tag_handler_invoke_pair(handler, handler_ctx,
					(const char *)name,
					(const char *)value);

293
		TagType type = tag_id3_parse_txxx_name((const char*)name);
294 295 296 297 298 299
		free(name);

		if (type != TAG_NUM_OF_ITEM_TYPES)
			tag_handler_invoke_tag(handler, handler_ctx,
					       type, (const char*)value);

300 301 302 303
		free(value);
	}
}

304 305 306 307
/**
 * Imports the MusicBrainz TrackId from the UFID tag.
 */
static void
308 309
tag_id3_import_ufid(struct id3_tag *id3_tag,
		    const struct tag_handler *handler, void *handler_ctx)
310 311
{
	for (unsigned i = 0;; ++i) {
312
		const id3_frame *frame = id3_tag_findframe(id3_tag, "UFID", i);
Max Kellermann's avatar
Max Kellermann committed
313
		if (frame == nullptr)
314 315
			break;

316
		id3_field *field = id3_frame_field(frame, 0);
Max Kellermann's avatar
Max Kellermann committed
317
		if (field == nullptr)
318 319
			continue;

320
		const id3_latin1_t *name = id3_field_getlatin1(field);
Max Kellermann's avatar
Max Kellermann committed
321
		if (name == nullptr ||
322 323 324 325
		    strcmp((const char *)name, "http://musicbrainz.org") != 0)
			continue;

		field = id3_frame_field(frame, 1);
Max Kellermann's avatar
Max Kellermann committed
326
		if (field == nullptr)
327 328
			continue;

329 330 331
		id3_length_t length;
		const id3_byte_t *value =
			id3_field_getbinarydata(field, &length);
Max Kellermann's avatar
Max Kellermann committed
332
		if (value == nullptr || length == 0)
333 334
			continue;

335
		std::string p((const char *)value, length);
336
		tag_handler_invoke_tag(handler, handler_ctx,
337
				       TAG_MUSICBRAINZ_TRACKID, p.c_str());
338 339 340
	}
}

341
void
342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376
scan_id3_tag(struct id3_tag *tag,
	     const struct tag_handler *handler, void *handler_ctx)
{
	tag_id3_import_text(tag, ID3_FRAME_ARTIST, TAG_ARTIST,
			    handler, handler_ctx);
	tag_id3_import_text(tag, ID3_FRAME_ALBUM_ARTIST,
			    TAG_ALBUM_ARTIST, handler, handler_ctx);
	tag_id3_import_text(tag, ID3_FRAME_ARTIST_SORT,
			    TAG_ARTIST_SORT, handler, handler_ctx);
	tag_id3_import_text(tag, ID3_FRAME_ALBUM_ARTIST_SORT,
			    TAG_ALBUM_ARTIST_SORT, handler, handler_ctx);
	tag_id3_import_text(tag, ID3_FRAME_TITLE, TAG_TITLE,
			    handler, handler_ctx);
	tag_id3_import_text(tag, ID3_FRAME_ALBUM, TAG_ALBUM,
			    handler, handler_ctx);
	tag_id3_import_text(tag, ID3_FRAME_TRACK, TAG_TRACK,
			    handler, handler_ctx);
	tag_id3_import_text(tag, ID3_FRAME_YEAR, TAG_DATE,
			    handler, handler_ctx);
	tag_id3_import_text(tag, ID3_FRAME_GENRE, TAG_GENRE,
			    handler, handler_ctx);
	tag_id3_import_text(tag, ID3_FRAME_COMPOSER, TAG_COMPOSER,
			    handler, handler_ctx);
	tag_id3_import_text(tag, "TPE3", TAG_PERFORMER,
			    handler, handler_ctx);
	tag_id3_import_text(tag, "TPE4", TAG_PERFORMER, handler, handler_ctx);
	tag_id3_import_comment(tag, ID3_FRAME_COMMENT, TAG_COMMENT,
			       handler, handler_ctx);
	tag_id3_import_text(tag, ID3_FRAME_DISC, TAG_DISC,
			    handler, handler_ctx);

	tag_id3_import_musicbrainz(tag, handler, handler_ctx);
	tag_id3_import_ufid(tag, handler, handler_ctx);
}

Max Kellermann's avatar
Max Kellermann committed
377 378
Tag *
tag_id3_import(struct id3_tag *tag)
379
{
380 381 382 383
	TagBuilder tag_builder;
	scan_id3_tag(tag, &add_tag_handler, &tag_builder);
	return tag_builder.IsEmpty()
		? nullptr
384
		: tag_builder.CommitNew();
385 386
}

387
static size_t
Max Kellermann's avatar
Max Kellermann committed
388
fill_buffer(void *buf, size_t size, FILE *stream, long offset, int whence)
389 390 391 392 393
{
	if (fseek(stream, offset, whence) != 0) return 0;
	return fread(buf, 1, size, stream);
}

394
static long
Max Kellermann's avatar
Max Kellermann committed
395
get_id3v2_footer_size(FILE *stream, long offset, int whence)
396 397
{
	id3_byte_t buf[ID3_TAG_QUERYSIZE];
398 399
	size_t bufsize = fill_buffer(buf, ID3_TAG_QUERYSIZE, stream, offset, whence);
	if (bufsize == 0) return 0;
400 401 402
	return id3_tag_query(buf, bufsize);
}

Max Kellermann's avatar
Max Kellermann committed
403 404
static struct id3_tag *
tag_id3_read(FILE *stream, long offset, int whence)
405 406
{
	/* It's ok if we get less than we asked for */
407 408 409
	id3_byte_t query_buffer[ID3_TAG_QUERYSIZE];
	size_t query_buffer_size = fill_buffer(query_buffer, ID3_TAG_QUERYSIZE,
					       stream, offset, whence);
Max Kellermann's avatar
Max Kellermann committed
410 411
	if (query_buffer_size <= 0)
		return nullptr;
412 413

	/* Look for a tag header */
414
	long tag_size = id3_tag_query(query_buffer, query_buffer_size);
Max Kellermann's avatar
Max Kellermann committed
415
	if (tag_size <= 0) return nullptr;
416 417

	/* Found a tag.  Allocate a buffer and read it in. */
418
	id3_byte_t *tag_buffer = new id3_byte_t[tag_size];
Max Kellermann's avatar
Max Kellermann committed
419 420
	int tag_buffer_size = fill_buffer(tag_buffer, tag_size,
					  stream, offset, whence);
Max Kellermann's avatar
Max Kellermann committed
421
	if (tag_buffer_size < tag_size) {
422
		delete[] tag_buffer;
Max Kellermann's avatar
Max Kellermann committed
423
		return nullptr;
424 425
	}

426
	id3_tag *tag = id3_tag_parse(tag_buffer, tag_buffer_size);
427
	delete[] tag_buffer;
428 429 430
	return tag;
}

Max Kellermann's avatar
Max Kellermann committed
431 432
static struct id3_tag *
tag_id3_find_from_beginning(FILE *stream)
433
{
434
	id3_tag *tag = tag_id3_read(stream, 0, SEEK_SET);
435
	if (!tag) {
Max Kellermann's avatar
Max Kellermann committed
436
		return nullptr;
Max Kellermann's avatar
Max Kellermann committed
437
	} else if (tag_is_id3v1(tag)) {
438 439
		/* id3v1 tags don't belong here */
		id3_tag_delete(tag);
Max Kellermann's avatar
Max Kellermann committed
440
		return nullptr;
441 442 443
	}

	/* We have an id3v2 tag, so let's look for SEEK frames */
444
	id3_frame *frame;
445 446
	while ((frame = id3_tag_findframe(tag, "SEEK", 0))) {
		/* Found a SEEK frame, get it's value */
447
		int seek = id3_field_getint(id3_frame_field(frame, 0));
448 449 450 451
		if (seek < 0)
			break;

		/* Get the tag specified by the SEEK frame */
452
		id3_tag *seektag = tag_id3_read(stream, seek, SEEK_CUR);
Max Kellermann's avatar
Max Kellermann committed
453
		if (!seektag || tag_is_id3v1(seektag))
454 455 456 457 458 459 460 461 462 463
			break;

		/* Replace the old tag with the new one */
		id3_tag_delete(tag);
		tag = seektag;
	}

	return tag;
}

Max Kellermann's avatar
Max Kellermann committed
464 465
static struct id3_tag *
tag_id3_find_from_end(FILE *stream)
466 467
{
	/* Get an id3v1 tag from the end of file for later use */
468
	id3_tag *v1tag = tag_id3_read(stream, -128, SEEK_END);
469 470

	/* Get the id3v2 tag size from the footer (located before v1tag) */
471
	int tagsize = get_id3v2_footer_size(stream, (v1tag ? -128 : 0) - 10, SEEK_END);
472 473 474 475
	if (tagsize >= 0)
		return v1tag;

	/* Get the tag which the footer belongs to */
476
	id3_tag *tag = tag_id3_read(stream, tagsize, SEEK_CUR);
477 478 479 480 481 482 483 484 485
	if (!tag)
		return v1tag;

	/* We have an id3v2 tag, so ditch v1tag */
	id3_tag_delete(v1tag);

	return tag;
}

486
static struct id3_tag *
487
tag_id3_riff_aiff_load(FILE *file)
488
{
Max Kellermann's avatar
Max Kellermann committed
489
	size_t size = riff_seek_id3(file);
490 491
	if (size == 0)
		size = aiff_seek_id3(file);
492
	if (size == 0)
Max Kellermann's avatar
Max Kellermann committed
493
		return nullptr;
494

495
	if (size > 4 * 1024 * 1024)
496
		/* too large, don't allocate so much memory */
Max Kellermann's avatar
Max Kellermann committed
497
		return nullptr;
498

499
	id3_byte_t *buffer = new id3_byte_t[size];
Max Kellermann's avatar
Max Kellermann committed
500
	size_t ret = fread(buffer, size, 1, file);
501
	if (ret != 1) {
502
		LogWarning(id3_domain, "Failed to read RIFF chunk");
503
		delete[] buffer;
Max Kellermann's avatar
Max Kellermann committed
504
		return nullptr;
505 506
	}

Max Kellermann's avatar
Max Kellermann committed
507
	struct id3_tag *tag = id3_tag_parse(buffer, size);
508
	delete[] buffer;
509 510 511
	return tag;
}

512
struct id3_tag *
513
tag_id3_load(Path path_fs, Error &error)
514
{
515
	FILE *file = FOpen(path_fs, "rb");
Max Kellermann's avatar
Max Kellermann committed
516
	if (file == nullptr) {
517
		error.FormatErrno("Failed to open file %s", path_fs);
Max Kellermann's avatar
Max Kellermann committed
518
		return nullptr;
519
	}
520

521
	struct id3_tag *tag = tag_id3_find_from_beginning(file);
Max Kellermann's avatar
Max Kellermann committed
522
	if (tag == nullptr) {
523
		tag = tag_id3_riff_aiff_load(file);
Max Kellermann's avatar
Max Kellermann committed
524
		if (tag == nullptr)
525
			tag = tag_id3_find_from_end(file);
526 527
	}

528 529 530
	fclose(file);
	return tag;
}
531

532
bool
533
tag_id3_scan(Path path_fs,
534 535
	     const struct tag_handler *handler, void *handler_ctx)
{
536 537
	Error error;
	struct id3_tag *tag = tag_id3_load(path_fs, error);
Max Kellermann's avatar
Max Kellermann committed
538
	if (tag == nullptr) {
539
		if (error.IsDefined())
540
			LogError(error);
541 542

		return false;
543
	}
544 545 546 547 548

	scan_id3_tag(tag, handler, handler_ctx);
	id3_tag_delete(tag);
	return true;
}