X-Git-Url: http://rtime.felk.cvut.cz/gitweb/hercules2020/kcf.git/blobdiff_plain/da3cc763bfdd64cdfee20e3346c04e2aa02ff6ad..HEAD:/main_vot.cpp diff --git a/main_vot.cpp b/main_vot.cpp index 56cf73d..fe5fb78 100644 --- a/main_vot.cpp +++ b/main_vot.cpp @@ -1,65 +1,138 @@ #include #include #include +#include +#include #include "kcf.h" #include "vot.hpp" +double calcAccuracy(std::string line, cv::Rect bb_rect, cv::Rect &groundtruth_rect) +{ + std::vector numbers; + std::istringstream s(line); + float x; + char ch; + + while (s >> x) { + numbers.push_back(x); + s >> ch; + } + double x1 = std::min(numbers[0], std::min(numbers[2], std::min(numbers[4], numbers[6]))); + double x2 = std::max(numbers[0], std::max(numbers[2], std::max(numbers[4], numbers[6]))); + double y1 = std::min(numbers[1], std::min(numbers[3], std::min(numbers[5], numbers[7]))); + double y2 = std::max(numbers[1], std::max(numbers[3], std::max(numbers[5], numbers[7]))); + + groundtruth_rect = cv::Rect(x1, y1, x2 - x1, y2 - y1); + + double rects_intersection = (groundtruth_rect & bb_rect).area(); + double rects_union = (groundtruth_rect | bb_rect).area(); + double accuracy = rects_intersection / rects_union; + + return accuracy; +} + int main(int argc, char *argv[]) { //load region, images and prepare for output - std::string region, images, output; - int visualize_delay = -1; + std::string region, images, output, video_out; + int visualize_delay = -1, fit_size_x = -1, fit_size_y = -1; + KCF_Tracker tracker; + cv::VideoWriter videoWriter; while (1) { int option_index = 0; static struct option long_options[] = { + {"debug", no_argument, 0, 'd' }, + {"visual_debug", optional_argument, 0, 'p'}, {"help", no_argument, 0, 'h' }, + {"output", required_argument, 0, 'o' }, + {"video_out", optional_argument, 0, 'O' }, {"visualize", optional_argument, 0, 'v' }, + {"fit", optional_argument, 0, 'f' }, {0, 0, 0, 0 } }; - int c = getopt_long(argc, argv, "hv::", - long_options, &option_index); + int c = getopt_long(argc, argv, "dp::hv::f::o:O::", long_options, &option_index); if (c == -1) break; switch (c) { + case 'd': + tracker.m_debug = true; + break; + case 'p': + if (!optarg || *optarg == 'p') + tracker.m_visual_debug = KCF_Tracker::vd::PATCH; + else if (optarg && *optarg == 'r') + tracker.m_visual_debug = KCF_Tracker::vd::RESPONSE; + else { + fprintf(stderr, "Unknown visual debug mode: %c", *optarg); + return 1; + } + break; case 'h': std::cerr << "Usage: \n" << argv[0] << " [options]\n" << argv[0] << " [options] \n" - << argv[0] << " [options] [path/to/output.txt]\n" + << argv[0] << " [options] [path/to/output.txt]\n" << "Options:\n" - << " --visualize | -v [delay_ms]\n"; + << " --visualize | -v[delay_ms]\n" + << " --output | -o \n" + << " --fit | -f[W[xH]]\n" + << " --debug | -d\n" + << " --visual_debug | -p [p|r]\n"; exit(0); break; + case 'o': + output = optarg; + break; + case 'O': + video_out = optarg ? optarg : "./output.avi"; + break; case 'v': visualize_delay = optarg ? atol(optarg) : 1; break; + case 'f': + if (!optarg) { + fit_size_x = fit_size_y = 0; + } else { + char tail; + if (sscanf(optarg, "%d%c", &fit_size_x, &tail) == 1) { + fit_size_y = fit_size_x; + } else if (sscanf(optarg, "%dx%d%c", &fit_size_x, &fit_size_y, &tail) != 2) { + fprintf(stderr, "Cannot parse -f argument: %s\n", optarg); + return 1; + } + } + break; } } switch (argc - optind) { + case 1: + if (chdir(argv[optind]) == -1) { + perror(argv[optind]); + exit(1); + } + // Fall through case 0: - region = "region.txt"; + region = access("groundtruth.txt", F_OK) == 0 ? "groundtruth.txt" : "region.txt"; images = "images.txt"; - output = "output.txt"; - break; - case 1: - region = std::string(argv[optind]) + "/region.txt"; - images = std::string(argv[optind]) + "/images.txt"; - output = std::string(argv[optind]) + "/output.txt"; + if (output.empty()) + output = "output.txt"; break; case 2: // Fall through case 3: region = std::string(argv[optind + 0]); images = std::string(argv[optind + 1]); - if ((argc - optind) == 3) - output = std::string(argv[optind + 2]); - else - output = std::string(dirname(argv[optind + 0])) + "/output.txt"; + if (output.empty()) { + if ((argc - optind) == 3) + output = std::string(argv[optind + 2]); + else + output = std::string(dirname(argv[optind + 0])) + "/output.txt"; + } break; default: std::cerr << "Too many arguments\n"; @@ -67,7 +140,14 @@ int main(int argc, char *argv[]) } VOT vot_io(region, images, output); - KCF_Tracker tracker; + // if groundtruth.txt is used use intersection over union (IOU) to calculate tracker accuracy + std::ifstream groundtruth_stream; + if (region.compare("groundtruth.txt") == 0) { + groundtruth_stream.open(region.c_str()); + std::string line; + std::getline(groundtruth_stream, line); + } + cv::Mat image; //img = firts frame, initPos = initial position in the first frame @@ -75,28 +155,68 @@ int main(int argc, char *argv[]) vot_io.outputBoundingBox(init_rect); vot_io.getNextImage(image); - tracker.init(image, init_rect); + if (!video_out.empty()) { + int codec = CV_FOURCC('M', 'J', 'P', 'G'); // select desired codec (must be available at runtime) + double fps = 25.0; // framerate of the created video stream + videoWriter.open(video_out, codec, fps, image.size(), true); + } + + tracker.init(image, init_rect, fit_size_x, fit_size_y); + BBox_c bb; - double avg_time = 0.; + cv::Rect bb_rect; + double avg_time = 0., sum_accuracy = 0.; int frames = 0; + + std::cout << std::fixed << std::setprecision(2); + while (vot_io.getNextImage(image) == 1){ double time_profile_counter = cv::getCPUTickCount(); tracker.track(image); time_profile_counter = cv::getCPUTickCount() - time_profile_counter; - std::cout << " -> speed : " << time_profile_counter/((double)cvGetTickFrequency()*1000) << "ms. per frame" << std::endl; + std::cout << " -> speed : " << time_profile_counter/((double)cvGetTickFrequency()*1000) << "ms per frame, " + "response : " << tracker.getFilterResponse(); avg_time += time_profile_counter/((double)cvGetTickFrequency()*1000); frames++; bb = tracker.getBBox(); - vot_io.outputBoundingBox(cv::Rect(bb.cx - bb.w/2., bb.cy - bb.h/2., bb.w, bb.h)); - - if (visualize_delay >= 0) { - cv::rectangle(image, cv::Rect(bb.cx - bb.w/2., bb.cy - bb.h/2., bb.w, bb.h), CV_RGB(0,255,0), 2); - cv::imshow("output", image); - int ret = cv::waitKey(visualize_delay); - if (visualize_delay > 0 && ret != -1 && ret != 255) - break; + bb_rect = cv::Rect(bb.cx - bb.w/2., bb.cy - bb.h/2., bb.w, bb.h); + vot_io.outputBoundingBox(bb_rect); + + if (groundtruth_stream.is_open()) { + std::string line; + std::getline(groundtruth_stream, line); + + cv::Rect groundtruthRect; + double accuracy = calcAccuracy(line, bb_rect, groundtruthRect); + if (visualize_delay >= 0) + cv::rectangle(image, groundtruthRect, CV_RGB(255, 0,0), 1); + std::cout << ", accuracy: " << accuracy; + sum_accuracy += accuracy; + } + + std::cout << std::endl; + + if (visualize_delay >= 0 || !video_out.empty()) { + cv::Point pt(bb.cx, bb.cy); + cv::Size size(bb.w, bb.h); + cv::RotatedRect rotatedRectangle(pt, size, bb.a); + + cv::Point2f vertices[4]; + rotatedRectangle.points(vertices); + + for (int i = 0; i < 4; i++) + cv::line(image, vertices[i], vertices[(i + 1) % 4], cv::Scalar(0, 255, 0), 2); + if (visualize_delay >= 0) { + cv::imshow("KCF output", image); + int ret = cv::waitKey(visualize_delay); + if ((visualize_delay > 0 && ret != -1 && ret < 128) || + (visualize_delay == 0 && (ret == 27 /*esc*/ || ret == 'q'))) + break; + } + if (!video_out.empty()) + videoWriter << image; } // std::stringstream s; @@ -120,7 +240,14 @@ int main(int argc, char *argv[]) // cv::imwrite(ss.c_str(), image, compression_params); } - std::cout << "Average processing speed " << avg_time/frames << "ms. (" << 1./(avg_time/frames)*1000 << " fps)" << std::endl; + std::cout << "Average processing speed: " << avg_time / frames << "ms (" << 1. / (avg_time / frames) * 1000 << " fps)"; + if (groundtruth_stream.is_open()) { + std::cout << "; Average accuracy: " << sum_accuracy/frames << std::endl; + groundtruth_stream.close(); + } + if (!video_out.empty()) + videoWriter.release(); + std::cout << std::endl; return EXIT_SUCCESS; }